In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m72.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m104.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.4 tokenizers-0.13.3 transformers-4.28.1
Looking in indexes: https://pypi.org/simple, https://u

In [None]:
!nvidia-smi

Sat Apr 22 10:34:27 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    45W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup, get_polynomial_decay_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
CUSTOM_MODEL_DIR = os.path.join(OUTPUT_DIR,'clrp_deberta_v3_base_epoch20')
OUTPUT_EXP_DIR = DIR + '/output/EXP023/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [None]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model_name="microsoft/deberta-v3-base"
    # model="microsoft/deberta-v3-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    model = CUSTOM_MODEL_DIR
    scheduler='polynomial' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=6
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=256
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=10
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False
    num_reinit_layers = 1
    is_reinit_layer = True

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [None]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    print(f"thresh : {best_thresh}")
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [None]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [None]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 6)


Unnamed: 0,id,title,year,abstract,keywords,y
0,1,Hierarchical Adversarially Learned Inference,2018,We propose a novel hierarchical generative mod...,"generative, hierarchical, unsupervised, semisu...",0
1,2,Learning to Compute Word Embeddings On the Fly,2018,Words in natural language follow a Zipfian dis...,"NLU, word embeddings, representation learning",0
2,3,Graph2Seq: Scalable Learning Dynamics for Graphs,2018,Neural networks are increasingly used as a gen...,,0


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [None]:
train["texts"] = train["title"] + "[SEP]" + train["abstract"]  

In [None]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [None]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

In [None]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 3 # cls
LOGGER.info(f"max_len: {CFG.max_len}")

100%|██████████| 4974/4974 [00:03<00:00, 1300.49it/s]
max_len: 522
INFO:__main__:max_len: 522


In [None]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
def reinit_layers(model):

    #for layer in model.model.encoder.layer[-CFG.num_reinit_layers:]:
    for layer in model.encoder.layer[-CFG.num_reinit_layers:]:    #Custome model内(backbone)

            for module in layer.modules():

                if isinstance(module,nn.Linear):
                    module.weight.data.normal_(mean=0.0,std=model.config.initializer_range)
                    if module.bias is not None:
                            module.bias.data.zero_()
                elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
                        
    return model

In [None]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if CFG.is_reinit_layer:
            self.model = reinit_layers(self.model)
            print(f'Reinitializing Last {CFG.num_reinit_layers} Layers.')
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())
        
        self.high_dropout = nn.Dropout(p=0.5)

        self.pool = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.layer_norm1)
        self.sig = nn.Sigmoid()
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        feature = self.layer_norm1(feature)
        #feature = self.sig(feature)
        return feature, outputs

    def forward(self, inputs=None, labels=None):
        feature, outputs = self.feature(inputs)
        logits = torch.mean(
            torch.stack(
                [self.fc(self.high_dropout(feature)) for _ in range(5)],
                dim=0,
            ),
            dim=0,
        )
        # calculate loss
        loss = None
        if labels is not None:
            loss_fn = nn.MSELoss()
            loss = loss_fn(logits.view(-1, 1), labels.view(-1, 1))
        
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

In [None]:
class Focal_MultiLabel_Loss(nn.Module):
    def __init__(self, gamma):
      super(Focal_MultiLabel_Loss, self).__init__()
      self.gamma = gamma
      self.bceloss = nn.BCEWithLogitsLoss()

    def forward(self, outputs, targets):
      bce = self.bceloss(outputs.view(-1, 1), targets.view(-1, 1))
      bce_exp = torch.exp(-bce)
      focal_loss = (1-bce_exp)**self.gamma * bce
      return focal_loss.mean()

In [None]:
def calculate_loss(inputs, labels, model, criterion, is_valid=True, device="cpu"):    
    outputs = model(inputs,labels)
    loss, logits = outputs[:2]
    return (loss, logits) if is_valid else loss

In [None]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        if scaler is not None:
            scaler.unscale_(optimizer)
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)

        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          #grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            loss, y_preds = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=True, device=device)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [None]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr=5e-6, decoder_lr=1e-4, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", 
                    "LayerNorm.weight"]
        group1=['layer.0.','layer.1.','layer.2.','layer.3.']
        group2=['layer.4.','layer.5.','layer.6.','layer.7.']    
        group3=['layer.8.','layer.9.','layer.10.','layer.11.']
        group_all=['layer.0.','layer.1.','layer.2.','layer.3.','layer.4.','layer.5.','layer.6.','layer.7.','layer.8.','layer.9.','layer.10.','layer.11.']
        optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': weight_decay, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': weight_decay, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': weight_decay, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': 0.0},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': 0.0, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': 0.0, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': 0.0, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.named_parameters() if "model" not in n], 'lr':decoder_lr, "momentum" : 0.99},
    ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        elif cfg.scheduler == 'polynomial':
            warmup_steps = int(len(train_folds) / CFG.batch_size * 0.1)
            scheduler = get_polynomial_decay_schedule_with_warmup(
                optimizer, warmup_steps, num_train_steps, lr_end=7e-7, power=3.0)
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.MSELoss()
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score_05 = get_score(valid_labels, predictions)
        score = get_acc_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [None]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dt

Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 3s (remain 16m 41s) Loss: 0.5521(0.5521) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 15s (remain 0m 26s) Loss: 0.3362(0.3035) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 26s (remain 0m 10s) Loss: 0.2266(0.2760) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 35s (remain 0m 0s) Loss: 0.1974(0.2624) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.0713(0.0713) 


Epoch 1 - avg_train_loss: 0.2624  avg_val_loss: 0.1978  time: 39s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2624  avg_val_loss: 0.1978  time: 39s
Epoch 1 - Score: 0.7008
INFO:__main__:Epoch 1 - Score: 0.7008
Epoch 1 - Save Best Score: 0.7008 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7008 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4485(0.1978) 
f1 score : 0.46841294298921415
recall score : 1.0
precision score : 0.3058350100603622
thresh : 0.6
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.1127(0.1127) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.3336(0.2115) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2384(0.2083) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2189(0.2081) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1200(0.1200) 


Epoch 2 - avg_train_loss: 0.2081  avg_val_loss: 0.1971  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2081  avg_val_loss: 0.1971  time: 36s
Epoch 2 - Score: 0.7028
INFO:__main__:Epoch 2 - Score: 0.7028
Epoch 2 - Save Best Score: 0.7028 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7028 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3075(0.1971) 
f1 score : 0.47058823529411764
recall score : 1.0
precision score : 0.3076923076923077
thresh : 0.64
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.1899(0.1899) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1909(0.1935) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1789(0.1898) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1776(0.1902) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0977(0.0977) 


Epoch 3 - avg_train_loss: 0.1902  avg_val_loss: 0.1923  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1902  avg_val_loss: 0.1923  time: 36s
Epoch 3 - Score: 0.7088
INFO:__main__:Epoch 3 - Score: 0.7088
Epoch 3 - Save Best Score: 0.7088 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7088 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3539(0.1923) 
f1 score : 0.4794952681388012
recall score : 1.0
precision score : 0.3153526970954357
thresh : 0.64
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.1337(0.1337) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.0922(0.1690) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1572(0.1697) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2188(0.1704) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 5s) Loss: 0.1000(0.1000) 


Epoch 4 - avg_train_loss: 0.1704  avg_val_loss: 0.1964  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1704  avg_val_loss: 0.1964  time: 36s
Epoch 4 - Score: 0.7108
INFO:__main__:Epoch 4 - Score: 0.7108
Epoch 4 - Save Best Score: 0.7108 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7108 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3475(0.1964) 
f1 score : 0.49185667752442996
recall score : 0.993421052631579
precision score : 0.3268398268398268
thresh : 0.63
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.1190(0.1190) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1208(0.1610) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1214(0.1565) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1880(0.1544) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0886(0.0886) 


Epoch 5 - avg_train_loss: 0.1544  avg_val_loss: 0.2015  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1544  avg_val_loss: 0.2015  time: 36s
Epoch 5 - Score: 0.7068
INFO:__main__:Epoch 5 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3916(0.2015) 
f1 score : 0.49662162162162166
recall score : 0.9671052631578947
precision score : 0.3340909090909091
thresh : 0.64
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 24s) Loss: 0.2218(0.2218) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1028(0.1391) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1091(0.1419) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1697(0.1435) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0731(0.0731) 


Epoch 6 - avg_train_loss: 0.1435  avg_val_loss: 0.2056  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1435  avg_val_loss: 0.2056  time: 36s
Epoch 6 - Score: 0.7129
INFO:__main__:Epoch 6 - Score: 0.7129
Epoch 6 - Save Best Score: 0.7129 Model
INFO:__main__:Epoch 6 - Save Best Score: 0.7129 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4388(0.2056) 
f1 score : 0.5025817555938038
recall score : 0.9605263157894737
precision score : 0.34032634032634035
thresh : 0.64


Score: 0.4197
INFO:__main__:Score: 0.4197
ACC BEST Score: 0.7129
INFO:__main__:ACC BEST Score: 0.7129
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.5025817555938038
recall score : 0.9605263157894737
precision score : 0.34032634032634035
thresh : 0.64


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.5626(0.5626) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3046(0.3136) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2958(0.2731) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3320(0.2568) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0147(0.0147) 


Epoch 1 - avg_train_loss: 0.2568  avg_val_loss: 0.2358  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2568  avg_val_loss: 0.2358  time: 36s
Epoch 1 - Score: 0.7149
INFO:__main__:Epoch 1 - Score: 0.7149
Epoch 1 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7312(0.2358) 
f1 score : 0.5017667844522968
recall score : 0.9281045751633987
precision score : 0.34382566585956414
thresh : 0.56
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.2484(0.2484) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1131(0.2087) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2818(0.2063) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1411(0.2051) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0250(0.0250) 


Epoch 2 - avg_train_loss: 0.2051  avg_val_loss: 0.2183  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2051  avg_val_loss: 0.2183  time: 36s
Epoch 2 - Score: 0.7088
INFO:__main__:Epoch 2 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6439(0.2183) 
f1 score : 0.4899328859060402
recall score : 0.954248366013072
precision score : 0.3295711060948081
thresh : 0.57
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.1274(0.1274) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1481(0.1820) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2673(0.1848) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1918(0.1843) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0819(0.0819) 


Epoch 3 - avg_train_loss: 0.1843  avg_val_loss: 0.1902  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1843  avg_val_loss: 0.1902  time: 36s
Epoch 3 - Score: 0.7249
INFO:__main__:Epoch 3 - Score: 0.7249
Epoch 3 - Save Best Score: 0.7249 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7249 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3947(0.1902) 
f1 score : 0.4757433489827857
recall score : 0.9934640522875817
precision score : 0.31275720164609055
thresh : 0.6
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 43s) Loss: 0.1343(0.1343) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2355(0.1789) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2063(0.1737) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1770(0.1717) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0955(0.0955) 


Epoch 4 - avg_train_loss: 0.1717  avg_val_loss: 0.1902  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1717  avg_val_loss: 0.1902  time: 36s
Epoch 4 - Score: 0.7369
INFO:__main__:Epoch 4 - Score: 0.7369
Epoch 4 - Save Best Score: 0.7369 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7369 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3560(0.1902) 
f1 score : 0.47634069400630913
recall score : 0.9869281045751634
precision score : 0.31392931392931395
thresh : 0.62
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.1840(0.1840) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2428(0.1579) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1586(0.1572) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1808(0.1586) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1412(0.1412) 


Epoch 5 - avg_train_loss: 0.1586  avg_val_loss: 0.2009  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1586  avg_val_loss: 0.2009  time: 36s
Epoch 5 - Score: 0.7390
INFO:__main__:Epoch 5 - Score: 0.7390
Epoch 5 - Save Best Score: 0.7390 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7390 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2714(0.2009) 
f1 score : 0.471875
recall score : 0.9869281045751634
precision score : 0.31006160164271046
thresh : 0.64
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.1544(0.1544) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1297(0.1516) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2291(0.1509) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1669(0.1498) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0838(0.0838) 


Epoch 6 - avg_train_loss: 0.1498  avg_val_loss: 0.1944  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1498  avg_val_loss: 0.1944  time: 36s
Epoch 6 - Score: 0.7349
INFO:__main__:Epoch 6 - Score: 0.7349


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3918(0.1944) 
f1 score : 0.49337748344370863
recall score : 0.9738562091503268
precision score : 0.3303769401330377
thresh : 0.62


Score: 0.3213
INFO:__main__:Score: 0.3213
ACC BEST Score: 0.7390
INFO:__main__:ACC BEST Score: 0.7390
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.471875
recall score : 0.9869281045751634
precision score : 0.31006160164271046
thresh : 0.64


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.4299(0.4299) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.2763(0.2731) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2415(0.2571) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2365(0.2492) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0215(0.0215) 


Epoch 1 - avg_train_loss: 0.2492  avg_val_loss: 0.2240  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2492  avg_val_loss: 0.2240  time: 36s
Epoch 1 - Score: 0.7088
INFO:__main__:Epoch 1 - Score: 0.7088
Epoch 1 - Save Best Score: 0.7088 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7088 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7025(0.2240) 
f1 score : 0.48465266558966075
recall score : 0.9803921568627451
precision score : 0.3218884120171674
thresh : 0.58
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.1936(0.1936) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1396(0.2098) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1925(0.2108) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2164(0.2094) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0550(0.0550) 


Epoch 2 - avg_train_loss: 0.2094  avg_val_loss: 0.1931  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2094  avg_val_loss: 0.1931  time: 36s
Epoch 2 - Score: 0.7129
INFO:__main__:Epoch 2 - Score: 0.7129
Epoch 2 - Save Best Score: 0.7129 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7129 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5246(0.1931) 
f1 score : 0.4801271860095389
recall score : 0.9869281045751634
precision score : 0.3172268907563025
thresh : 0.61
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.2361(0.2361) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1550(0.1959) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1463(0.1903) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1243(0.1888) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0637(0.0637) 


Epoch 3 - avg_train_loss: 0.1888  avg_val_loss: 0.1888  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1888  avg_val_loss: 0.1888  time: 36s
Epoch 3 - Score: 0.7108
INFO:__main__:Epoch 3 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4879(0.1888) 
f1 score : 0.4856230031948881
recall score : 0.9934640522875817
precision score : 0.321353065539112
thresh : 0.61
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.1876(0.1876) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1387(0.1753) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1484(0.1705) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1347(0.1704) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1005(0.1005) 


Epoch 4 - avg_train_loss: 0.1704  avg_val_loss: 0.1845  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1704  avg_val_loss: 0.1845  time: 36s
Epoch 4 - Score: 0.7129
INFO:__main__:Epoch 4 - Score: 0.7129


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4032(0.1845) 
f1 score : 0.4872611464968153
recall score : 1.0
precision score : 0.32210526315789473
thresh : 0.64
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.1466(0.1466) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2253(0.1613) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1486(0.1591) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1518(0.1593) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1148(0.1148) 


Epoch 5 - avg_train_loss: 0.1593  avg_val_loss: 0.1863  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1593  avg_val_loss: 0.1863  time: 36s
Epoch 5 - Score: 0.7088
INFO:__main__:Epoch 5 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3863(0.1863) 
f1 score : 0.4903225806451613
recall score : 0.9934640522875817
precision score : 0.32548179871520344
thresh : 0.62
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 35s) Loss: 0.1159(0.1159) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1448(0.1503) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2098(0.1537) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1340(0.1517) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1111(0.1111) 


Epoch 6 - avg_train_loss: 0.1517  avg_val_loss: 0.1875  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1517  avg_val_loss: 0.1875  time: 36s
Epoch 6 - Score: 0.7249
INFO:__main__:Epoch 6 - Score: 0.7249
Epoch 6 - Save Best Score: 0.7249 Model
INFO:__main__:Epoch 6 - Save Best Score: 0.7249 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4082(0.1875) 
f1 score : 0.5024793388429752
recall score : 0.9934640522875817
precision score : 0.336283185840708
thresh : 0.62


Score: 0.3956
INFO:__main__:Score: 0.3956
ACC BEST Score: 0.7249
INFO:__main__:ACC BEST Score: 0.7249
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.5024793388429752
recall score : 0.9934640522875817
precision score : 0.336283185840708
thresh : 0.62


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 1.2187(1.2187) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2265(0.3691) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1373(0.3027) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2183(0.2830) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0739(0.0739) 


Epoch 1 - avg_train_loss: 0.2830  avg_val_loss: 0.2028  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2830  avg_val_loss: 0.2028  time: 36s
Epoch 1 - Score: 0.7048
INFO:__main__:Epoch 1 - Score: 0.7048
Epoch 1 - Save Best Score: 0.7048 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7048 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4719(0.2028) 
f1 score : 0.47149460708782737
recall score : 1.0
precision score : 0.3084677419354839
thresh : 0.59
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.2017(0.2017) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1581(0.2101) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2707(0.2099) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2477(0.2090) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0674(0.0674) 


Epoch 2 - avg_train_loss: 0.2090  avg_val_loss: 0.2008  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2090  avg_val_loss: 0.2008  time: 36s
Epoch 2 - Score: 0.7068
INFO:__main__:Epoch 2 - Score: 0.7068
Epoch 2 - Save Best Score: 0.7068 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7068 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4823(0.2008) 
f1 score : 0.47663551401869164
recall score : 1.0
precision score : 0.3128834355828221
thresh : 0.61
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.1957(0.1957) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2266(0.1943) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1901(0.1936) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1505(0.1905) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0676(0.0676) 


Epoch 3 - avg_train_loss: 0.1905  avg_val_loss: 0.1974  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1905  avg_val_loss: 0.1974  time: 36s
Epoch 3 - Score: 0.7068
INFO:__main__:Epoch 3 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4541(0.1974) 
f1 score : 0.48253968253968255
recall score : 0.9934640522875817
precision score : 0.31865828092243187
thresh : 0.6
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.2095(0.2095) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1363(0.1714) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2817(0.1738) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1919(0.1743) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0523(0.0523) 


Epoch 4 - avg_train_loss: 0.1743  avg_val_loss: 0.2052  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1743  avg_val_loss: 0.2052  time: 36s
Epoch 4 - Score: 0.7129
INFO:__main__:Epoch 4 - Score: 0.7129
Epoch 4 - Save Best Score: 0.7129 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7129 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5228(0.2052) 
f1 score : 0.5017182130584193
recall score : 0.954248366013072
precision score : 0.34032634032634035
thresh : 0.61
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.1289(0.1289) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1240(0.1652) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0651(0.1621) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2728(0.1617) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0913(0.0913) 


Epoch 5 - avg_train_loss: 0.1617  avg_val_loss: 0.1945  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1617  avg_val_loss: 0.1945  time: 36s
Epoch 5 - Score: 0.7189
INFO:__main__:Epoch 5 - Score: 0.7189
Epoch 5 - Save Best Score: 0.7189 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7189 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3780(0.1945) 
f1 score : 0.48445171849427165
recall score : 0.9673202614379085
precision score : 0.3231441048034934
thresh : 0.64
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.1352(0.1352) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1150(0.1480) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.2099(0.1561) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1387(0.1546) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0594(0.0594) 


Epoch 6 - avg_train_loss: 0.1546  avg_val_loss: 0.2052  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1546  avg_val_loss: 0.2052  time: 36s
Epoch 6 - Score: 0.7249
INFO:__main__:Epoch 6 - Score: 0.7249
Epoch 6 - Save Best Score: 0.7249 Model
INFO:__main__:Epoch 6 - Save Best Score: 0.7249 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4927(0.2052) 
f1 score : 0.5089605734767026
recall score : 0.9281045751633987
precision score : 0.3506172839506173
thresh : 0.62


Score: 0.4498
INFO:__main__:Score: 0.4498
ACC BEST Score: 0.7249
INFO:__main__:ACC BEST Score: 0.7249
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.5089605734767026
recall score : 0.9281045751633987
precision score : 0.3506172839506173
thresh : 0.62


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.3482(0.3482) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2897(0.2964) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1082(0.2757) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3314(0.2615) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0088(0.0088) 


Epoch 1 - avg_train_loss: 0.2615  avg_val_loss: 0.2934  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2615  avg_val_loss: 0.2934  time: 36s
Epoch 1 - Score: 0.7042
INFO:__main__:Epoch 1 - Score: 0.7042
Epoch 1 - Save Best Score: 0.7042 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7042 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9263(0.2934) 
f1 score : 0.5050505050505051
recall score : 0.6578947368421053
precision score : 0.4098360655737705
thresh : 0.54
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.1753(0.1753) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1678(0.2111) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2449(0.2069) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2304(0.2058) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1121(0.1121) 


Epoch 2 - avg_train_loss: 0.2058  avg_val_loss: 0.1942  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2058  avg_val_loss: 0.1942  time: 36s
Epoch 2 - Score: 0.7062
INFO:__main__:Epoch 2 - Score: 0.7062
Epoch 2 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3270(0.1942) 
f1 score : 0.46986089644513135
recall score : 1.0
precision score : 0.30707070707070705
thresh : 0.61
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.1921(0.1921) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2820(0.1941) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2031(0.1902) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2165(0.1872) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1213(0.1213) 


Epoch 3 - avg_train_loss: 0.1872  avg_val_loss: 0.1963  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1872  avg_val_loss: 0.1963  time: 36s
Epoch 3 - Score: 0.7143
INFO:__main__:Epoch 3 - Score: 0.7143
Epoch 3 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2990(0.1963) 
f1 score : 0.47574334898278564
recall score : 1.0
precision score : 0.31211498973305957
thresh : 0.63
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.0869(0.0869) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1934(0.1751) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1645(0.1741) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1626(0.1707) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0839(0.0839) 


Epoch 4 - avg_train_loss: 0.1707  avg_val_loss: 0.1947  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1707  avg_val_loss: 0.1947  time: 36s
Epoch 4 - Score: 0.7264
INFO:__main__:Epoch 4 - Score: 0.7264
Epoch 4 - Save Best Score: 0.7264 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7264 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3845(0.1947) 
f1 score : 0.48788368336025845
recall score : 0.993421052631579
precision score : 0.3233404710920771
thresh : 0.62
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 43s) Loss: 0.1666(0.1666) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1338(0.1554) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1474(0.1557) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1941(0.1546) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0858(0.0858) 


Epoch 5 - avg_train_loss: 0.1546  avg_val_loss: 0.1987  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1546  avg_val_loss: 0.1987  time: 36s
Epoch 5 - Score: 0.7324
INFO:__main__:Epoch 5 - Score: 0.7324
Epoch 5 - Save Best Score: 0.7324 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7324 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3901(0.1987) 
f1 score : 0.48701298701298706
recall score : 0.9868421052631579
precision score : 0.3232758620689655
thresh : 0.63
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.1823(0.1823) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1212(0.1413) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1233(0.1408) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1579(0.1420) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1278(0.1278) 


Epoch 6 - avg_train_loss: 0.1420  avg_val_loss: 0.2051  time: 37s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1420  avg_val_loss: 0.2051  time: 37s
Epoch 6 - Score: 0.7304
INFO:__main__:Epoch 6 - Score: 0.7304


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3054(0.2051) 
f1 score : 0.47846889952153104
recall score : 0.9868421052631579
precision score : 0.3157894736842105
thresh : 0.65


Score: 0.3642
INFO:__main__:Score: 0.3642
ACC BEST Score: 0.7324
INFO:__main__:ACC BEST Score: 0.7324
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.48701298701298706
recall score : 0.9868421052631579
precision score : 0.3232758620689655
thresh : 0.63


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.4645(0.4645) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2546(0.3595) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1671(0.2995) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3059(0.2795) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0146(0.0146) 


Epoch 1 - avg_train_loss: 0.2795  avg_val_loss: 0.2470  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2795  avg_val_loss: 0.2470  time: 36s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7401(0.2470) 
f1 score : 0.4956672443674177
recall score : 0.9407894736842105
precision score : 0.33647058823529413
thresh : 0.57
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.3017(0.3017) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2041(0.2053) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1234(0.2046) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1356(0.2078) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1166(0.1166) 


Epoch 2 - avg_train_loss: 0.2078  avg_val_loss: 0.2032  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2078  avg_val_loss: 0.2032  time: 36s
Epoch 2 - Score: 0.6942
INFO:__main__:Epoch 2 - Score: 0.6942


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3840(0.2032) 
f1 score : 0.4713178294573644
recall score : 1.0
precision score : 0.30831643002028397
thresh : 0.66
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.2068(0.2068) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1787(0.1956) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1748(0.1905) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1389(0.1889) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1171(0.1171) 


Epoch 3 - avg_train_loss: 0.1889  avg_val_loss: 0.2040  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1889  avg_val_loss: 0.2040  time: 36s
Epoch 3 - Score: 0.6942
INFO:__main__:Epoch 3 - Score: 0.6942


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3740(0.2040) 
f1 score : 0.4740973312401883
recall score : 0.993421052631579
precision score : 0.311340206185567
thresh : 0.65
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.1868(0.1868) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1180(0.1795) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0787(0.1719) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1358(0.1713) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1720(0.1720) 


Epoch 4 - avg_train_loss: 0.1713  avg_val_loss: 0.2133  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1713  avg_val_loss: 0.2133  time: 36s
Epoch 4 - Score: 0.6962
INFO:__main__:Epoch 4 - Score: 0.6962
Epoch 4 - Save Best Score: 0.6962 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.6962 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2869(0.2133) 
f1 score : 0.4711388455538222
recall score : 0.993421052631579
precision score : 0.30879345603271985
thresh : 0.71
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.1415(0.1415) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1611(0.1629) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1771(0.1599) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1939(0.1581) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1284(0.1284) 


Epoch 5 - avg_train_loss: 0.1581  avg_val_loss: 0.2097  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1581  avg_val_loss: 0.2097  time: 36s
Epoch 5 - Score: 0.7002
INFO:__main__:Epoch 5 - Score: 0.7002
Epoch 5 - Save Best Score: 0.7002 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7002 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3551(0.2097) 
f1 score : 0.4868421052631579
recall score : 0.9736842105263158
precision score : 0.32456140350877194
thresh : 0.68
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.1576(0.1576) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0946(0.1422) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2080(0.1490) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1552(0.1471) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1116(0.1116) 


Epoch 6 - avg_train_loss: 0.1471  avg_val_loss: 0.2117  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1471  avg_val_loss: 0.2117  time: 36s
Epoch 6 - Score: 0.6962
INFO:__main__:Epoch 6 - Score: 0.6962


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3860(0.2117) 
f1 score : 0.48747913188647746
recall score : 0.9605263157894737
precision score : 0.32662192393736017
thresh : 0.68


Score: 0.3722
INFO:__main__:Score: 0.3722
ACC BEST Score: 0.7002
INFO:__main__:ACC BEST Score: 0.7002
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.4868421052631579
recall score : 0.9736842105263158
precision score : 0.32456140350877194
thresh : 0.68


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.1803(0.1803) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2236(0.3039) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2024(0.2710) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2186(0.2604) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0937(0.0937) 


Epoch 1 - avg_train_loss: 0.2604  avg_val_loss: 0.1975  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2604  avg_val_loss: 0.1975  time: 36s
Epoch 1 - Score: 0.7042
INFO:__main__:Epoch 1 - Score: 0.7042
Epoch 1 - Save Best Score: 0.7042 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7042 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4232(0.1975) 
f1 score : 0.46841294298921415
recall score : 1.0
precision score : 0.3058350100603622
thresh : 0.62
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.2360(0.2360) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1961(0.2090) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1900(0.2072) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1184(0.2061) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0695(0.0695) 


Epoch 2 - avg_train_loss: 0.2061  avg_val_loss: 0.1933  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2061  avg_val_loss: 0.1933  time: 37s
Epoch 2 - Score: 0.7082
INFO:__main__:Epoch 2 - Score: 0.7082
Epoch 2 - Save Best Score: 0.7082 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7082 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4571(0.1933) 
f1 score : 0.4720496894409938
recall score : 1.0
precision score : 0.3089430894308943
thresh : 0.6
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.2330(0.2330) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1465(0.1846) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.1946(0.1857) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.2025(0.1881) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0913(0.0913) 


Epoch 3 - avg_train_loss: 0.1881  avg_val_loss: 0.1923  time: 37s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1881  avg_val_loss: 0.1923  time: 37s
Epoch 3 - Score: 0.7082
INFO:__main__:Epoch 3 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4006(0.1923) 
f1 score : 0.4735202492211838
recall score : 1.0
precision score : 0.31020408163265306
thresh : 0.62
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.1670(0.1670) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1176(0.1675) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1931(0.1721) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1714(0.1733) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0717(0.0717) 


Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1934  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1733  avg_val_loss: 0.1934  time: 36s
Epoch 4 - Score: 0.7062
INFO:__main__:Epoch 4 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4601(0.1934) 
f1 score : 0.49586776859504134
recall score : 0.9868421052631579
precision score : 0.33112582781456956
thresh : 0.61
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 55s) Loss: 0.1339(0.1339) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1884(0.1589) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1651(0.1600) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1109(0.1618) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1208(0.1208) 


Epoch 5 - avg_train_loss: 0.1618  avg_val_loss: 0.1963  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1618  avg_val_loss: 0.1963  time: 36s
Epoch 5 - Score: 0.7123
INFO:__main__:Epoch 5 - Score: 0.7123
Epoch 5 - Save Best Score: 0.7123 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7123 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3478(0.1963) 
f1 score : 0.47936507936507944
recall score : 0.993421052631579
precision score : 0.3158995815899582
thresh : 0.65
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.1661(0.1661) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1314(0.1486) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1520(0.1529) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1689(0.1541) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1078(0.1078) 


Epoch 6 - avg_train_loss: 0.1541  avg_val_loss: 0.1952  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1541  avg_val_loss: 0.1952  time: 36s
Epoch 6 - Score: 0.7103
INFO:__main__:Epoch 6 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3877(0.1952) 
f1 score : 0.48387096774193544
recall score : 0.9868421052631579
precision score : 0.32051282051282054
thresh : 0.64


Score: 0.3400
INFO:__main__:Score: 0.3400
ACC BEST Score: 0.7123
INFO:__main__:ACC BEST Score: 0.7123
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.47936507936507944
recall score : 0.993421052631579
precision score : 0.3158995815899582
thresh : 0.65


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 50s) Loss: 0.2180(0.2180) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2128(0.3060) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3123(0.2831) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0864(0.2689) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0226(0.0226) 


Epoch 1 - avg_train_loss: 0.2689  avg_val_loss: 0.2363  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2689  avg_val_loss: 0.2363  time: 36s
Epoch 1 - Score: 0.7103
INFO:__main__:Epoch 1 - Score: 0.7103
Epoch 1 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7023(0.2363) 
f1 score : 0.48287671232876705
recall score : 0.9276315789473685
precision score : 0.3263888888888889
thresh : 0.57
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.1786(0.1786) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2531(0.2054) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2856(0.2024) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1834(0.2054) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0553(0.0553) 


Epoch 2 - avg_train_loss: 0.2054  avg_val_loss: 0.2032  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2054  avg_val_loss: 0.2032  time: 36s
Epoch 2 - Score: 0.7103
INFO:__main__:Epoch 2 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5132(0.2032) 
f1 score : 0.47770700636942676
recall score : 0.9868421052631579
precision score : 0.31512605042016806
thresh : 0.59
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.1560(0.1560) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2615(0.2025) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1290(0.1919) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1753(0.1914) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1144(0.1144) 


Epoch 3 - avg_train_loss: 0.1914  avg_val_loss: 0.1986  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1914  avg_val_loss: 0.1986  time: 36s
Epoch 3 - Score: 0.7143
INFO:__main__:Epoch 3 - Score: 0.7143
Epoch 3 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3536(0.1986) 
f1 score : 0.4735202492211838
recall score : 1.0
precision score : 0.31020408163265306
thresh : 0.63
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 37s) Loss: 0.2331(0.2331) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1880(0.1791) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2059(0.1767) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1451(0.1763) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0941(0.0941) 


Epoch 4 - avg_train_loss: 0.1763  avg_val_loss: 0.1970  time: 37s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1763  avg_val_loss: 0.1970  time: 37s
Epoch 4 - Score: 0.7183
INFO:__main__:Epoch 4 - Score: 0.7183
Epoch 4 - Save Best Score: 0.7183 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7183 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3940(0.1970) 
f1 score : 0.48242811501597443
recall score : 0.993421052631579
precision score : 0.31856540084388185
thresh : 0.64
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.1066(0.1066) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1888(0.1694) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1136(0.1684) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1897(0.1650) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0993(0.0993) 


Epoch 5 - avg_train_loss: 0.1650  avg_val_loss: 0.1969  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1650  avg_val_loss: 0.1969  time: 36s
Epoch 5 - Score: 0.7203
INFO:__main__:Epoch 5 - Score: 0.7203
Epoch 5 - Save Best Score: 0.7203 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7203 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3768(0.1969) 
f1 score : 0.48000000000000004
recall score : 0.9868421052631579
precision score : 0.3171247357293869
thresh : 0.65
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.2340(0.2340) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1875(0.1550) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1393(0.1560) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0860(0.1567) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0909(0.0909) 


Epoch 6 - avg_train_loss: 0.1567  avg_val_loss: 0.1968  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1567  avg_val_loss: 0.1968  time: 36s
Epoch 6 - Score: 0.7223
INFO:__main__:Epoch 6 - Score: 0.7223
Epoch 6 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 6 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3999(0.1968) 
f1 score : 0.4813008130081301
recall score : 0.9736842105263158
precision score : 0.31965442764578833
thresh : 0.64


Score: 0.3581
INFO:__main__:Score: 0.3581
ACC BEST Score: 0.7223
INFO:__main__:ACC BEST Score: 0.7223
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.4813008130081301
recall score : 0.9736842105263158
precision score : 0.31965442764578833
thresh : 0.64


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.8139(0.8139) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2947(0.3878) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2011(0.3183) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.2614(0.2912) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0830(0.0830) 


Epoch 1 - avg_train_loss: 0.2912  avg_val_loss: 0.1947  time: 37s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2912  avg_val_loss: 0.1947  time: 37s
Epoch 1 - Score: 0.7123
INFO:__main__:Epoch 1 - Score: 0.7123
Epoch 1 - Save Best Score: 0.7123 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7123 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4170(0.1947) 
f1 score : 0.46841294298921415
recall score : 1.0
precision score : 0.3058350100603622
thresh : 0.59
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 34s) Loss: 0.1758(0.1758) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2217(0.2155) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2162(0.2146) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0745(0.2122) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0516(0.0516) 


Epoch 2 - avg_train_loss: 0.2122  avg_val_loss: 0.1953  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2122  avg_val_loss: 0.1953  time: 37s
Epoch 2 - Score: 0.7163
INFO:__main__:Epoch 2 - Score: 0.7163
Epoch 2 - Save Best Score: 0.7163 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7163 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4729(0.1953) 
f1 score : 0.4720496894409938
recall score : 1.0
precision score : 0.3089430894308943
thresh : 0.58
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.2425(0.2425) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2534(0.1919) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2631(0.1941) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1880(0.1931) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1663(0.1663) 


Epoch 3 - avg_train_loss: 0.1931  avg_val_loss: 0.1933  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1931  avg_val_loss: 0.1933  time: 36s
Epoch 3 - Score: 0.7183
INFO:__main__:Epoch 3 - Score: 0.7183
Epoch 3 - Save Best Score: 0.7183 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7183 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2180(0.1933) 
f1 score : 0.46841294298921415
recall score : 1.0
precision score : 0.3058350100603622
thresh : 0.63
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.2544(0.2544) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1352(0.1823) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1881(0.1740) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1545(0.1716) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.1129(0.1129) 


Epoch 4 - avg_train_loss: 0.1716  avg_val_loss: 0.1845  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1716  avg_val_loss: 0.1845  time: 36s
Epoch 4 - Score: 0.7183
INFO:__main__:Epoch 4 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2799(0.1845) 
f1 score : 0.488673139158576
recall score : 0.993421052631579
precision score : 0.3240343347639485
thresh : 0.62
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.1656(0.1656) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1127(0.1627) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1887(0.1619) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1401(0.1617) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1213(0.1213) 


Epoch 5 - avg_train_loss: 0.1617  avg_val_loss: 0.1865  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1617  avg_val_loss: 0.1865  time: 36s
Epoch 5 - Score: 0.7223
INFO:__main__:Epoch 5 - Score: 0.7223
Epoch 5 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2544(0.1865) 
f1 score : 0.4894651539708266
recall score : 0.993421052631579
precision score : 0.3247311827956989
thresh : 0.62
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.1852(0.1852) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0236(0.1513) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1098(0.1543) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1258(0.1547) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1355(0.1355) 


Epoch 6 - avg_train_loss: 0.1547  avg_val_loss: 0.1908  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1547  avg_val_loss: 0.1908  time: 36s
Epoch 6 - Score: 0.7304
INFO:__main__:Epoch 6 - Score: 0.7304
Epoch 6 - Save Best Score: 0.7304 Model
INFO:__main__:Epoch 6 - Save Best Score: 0.7304 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2389(0.1908) 
f1 score : 0.49185667752442996
recall score : 0.993421052631579
precision score : 0.3268398268398268
thresh : 0.63


Score: 0.3722
INFO:__main__:Score: 0.3722
ACC BEST Score: 0.7304
INFO:__main__:ACC BEST Score: 0.7304
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.49185667752442996
recall score : 0.993421052631579
precision score : 0.3268398268398268
thresh : 0.63


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 49s) Loss: 0.4551(0.4551) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3042(0.2702) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2213(0.2562) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1345(0.2486) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0999(0.0999) 


Epoch 1 - avg_train_loss: 0.2486  avg_val_loss: 0.2014  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.2486  avg_val_loss: 0.2014  time: 36s
Epoch 1 - Score: 0.6982
INFO:__main__:Epoch 1 - Score: 0.6982
Epoch 1 - Save Best Score: 0.6982 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6982 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3768(0.2014) 
f1 score : 0.46913580246913583
recall score : 1.0
precision score : 0.3064516129032258
thresh : 0.61
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 41s) Loss: 0.1408(0.1408) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3439(0.1998) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0934(0.2020) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2340(0.2021) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1258(0.1258) 


Epoch 2 - avg_train_loss: 0.2021  avg_val_loss: 0.2035  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.2021  avg_val_loss: 0.2035  time: 36s
Epoch 2 - Score: 0.7022
INFO:__main__:Epoch 2 - Score: 0.7022
Epoch 2 - Save Best Score: 0.7022 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7022 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3278(0.2035) 
f1 score : 0.46913580246913583
recall score : 1.0
precision score : 0.3064516129032258
thresh : 0.64
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.1649(0.1649) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1095(0.2002) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1803(0.1899) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1987(0.1858) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1558(0.1558) 


Epoch 3 - avg_train_loss: 0.1858  avg_val_loss: 0.2124  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1858  avg_val_loss: 0.2124  time: 36s
Epoch 3 - Score: 0.7062
INFO:__main__:Epoch 3 - Score: 0.7062
Epoch 3 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2425(0.2124) 
f1 score : 0.4713178294573644
recall score : 1.0
precision score : 0.30831643002028397
thresh : 0.66
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.2055(0.2055) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0945(0.1575) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2690(0.1618) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1695(0.1616) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1121(0.1121) 


Epoch 4 - avg_train_loss: 0.1616  avg_val_loss: 0.2018  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1616  avg_val_loss: 0.2018  time: 36s
Epoch 4 - Score: 0.7103
INFO:__main__:Epoch 4 - Score: 0.7103
Epoch 4 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2916(0.2018) 
f1 score : 0.47936507936507944
recall score : 0.993421052631579
precision score : 0.3158995815899582
thresh : 0.65
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.1655(0.1655) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1193(0.1490) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1297(0.1482) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1750(0.1475) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0795(0.0795) 


Epoch 5 - avg_train_loss: 0.1475  avg_val_loss: 0.2056  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1475  avg_val_loss: 0.2056  time: 36s
Epoch 5 - Score: 0.7103
INFO:__main__:Epoch 5 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3806(0.2056) 
f1 score : 0.49498327759197325
recall score : 0.9736842105263158
precision score : 0.33183856502242154
thresh : 0.65
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.0964(0.0964) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1501(0.1432) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1427(0.1376) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0931(0.1376) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0705(0.0705) 


Epoch 6 - avg_train_loss: 0.1376  avg_val_loss: 0.2093  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1376  avg_val_loss: 0.2093  time: 36s
Epoch 6 - Score: 0.7123
INFO:__main__:Epoch 6 - Score: 0.7123
Epoch 6 - Save Best Score: 0.7123 Model
INFO:__main__:Epoch 6 - Save Best Score: 0.7123 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4022(0.2093) 
f1 score : 0.4931506849315068
recall score : 0.9473684210526315
precision score : 0.3333333333333333
thresh : 0.63


Score: 0.4044
INFO:__main__:Score: 0.4044
ACC BEST Score: 0.7123
INFO:__main__:ACC BEST Score: 0.7123
Score: 0.3798
INFO:__main__:Score: 0.3798
ACC BEST Score: 0.7107
INFO:__main__:ACC BEST Score: 0.7107


f1 score : 0.4931506849315068
recall score : 0.9473684210526315
precision score : 0.3333333333333333
thresh : 0.63
f1 score : 0.490166914559577
recall score : 0.9737360472751149
precision score : 0.32751766784452296
thresh : 0.63


In [None]:
from google.colab import runtime
runtime.unassign()