In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!nvidia-smi

Fri Apr 21 05:20:21 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0    43W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup, get_polynomial_decay_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
CUSTOM_MODEL_DIR = os.path.join(OUTPUT_DIR,'clrp_deberta_v3_base_epoch20')
OUTPUT_EXP_DIR = DIR + '/output/EXP020/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model_name="microsoft/deberta-v3-base"
    # model="microsoft/deberta-v3-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    model = CUSTOM_MODEL_DIR
    scheduler='polynomial' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=6
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=256
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=10
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False
    num_reinit_layers = 1
    is_reinit_layer = True

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 6)


Unnamed: 0,id,title,year,abstract,keywords,y
0,1,Hierarchical Adversarially Learned Inference,2018,We propose a novel hierarchical generative mod...,"generative, hierarchical, unsupervised, semisu...",0
1,2,Learning to Compute Word Embeddings On the Fly,2018,Words in natural language follow a Zipfian dis...,"NLU, word embeddings, representation learning",0
2,3,Graph2Seq: Scalable Learning Dynamics for Graphs,2018,Neural networks are increasingly used as a gen...,,0


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [10]:
train["texts"] = train["title"] + "[SEP]" + train["abstract"]  

In [11]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [12]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

In [13]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 3 # cls
LOGGER.info(f"max_len: {CFG.max_len}")

100%|██████████| 4974/4974 [00:03<00:00, 1328.35it/s]
max_len: 522
INFO:__main__:max_len: 522


In [14]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [15]:
def reinit_layers(model):

    #for layer in model.model.encoder.layer[-CFG.num_reinit_layers:]:
    for layer in model.encoder.layer[-CFG.num_reinit_layers:]:    #Custome model内(backbone)

            for module in layer.modules():

                if isinstance(module,nn.Linear):
                    module.weight.data.normal_(mean=0.0,std=model.config.initializer_range)
                    if module.bias is not None:
                            module.bias.data.zero_()
                elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
                        
    return model

In [16]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if CFG.is_reinit_layer:
            self.model = reinit_layers(self.model)
            print(f'Reinitializing Last {CFG.num_reinit_layers} Layers.')
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())
        
        self.high_dropout = nn.Dropout(p=0.5)

        self.pool = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self.sig = nn.Sigmoid()
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        feature = self.layer_norm1(feature)
        return feature, outputs

    def forward(self, inputs=None, labels=None):
        feature, outputs = self.feature(inputs)
        logits = torch.mean(
            torch.stack(
                [self.fc(self.high_dropout(feature)) for _ in range(5)],
                dim=0,
            ),
            dim=0,
        )
        # calculate loss
        loss = None
        if labels is not None:
            loss_fn = Focal_MultiLabel_Loss(gamma=2.0)
            loss = loss_fn(logits, labels)
        
        output = (logits,) + outputs[2:]
        return ((loss,) + output) if loss is not None else output

In [17]:
class Focal_MultiLabel_Loss(nn.Module):
    def __init__(self, gamma):
      super(Focal_MultiLabel_Loss, self).__init__()
      self.gamma = gamma
      self.bceloss = nn.BCEWithLogitsLoss()

    def forward(self, outputs, targets):
      bce = self.bceloss(outputs.view(-1, 1), targets.view(-1, 1))
      bce_exp = torch.exp(-bce)
      focal_loss = (1-bce_exp)**self.gamma * bce
      return focal_loss.mean()

In [18]:
def calculate_loss(inputs, labels, model, criterion, is_valid=True, device="cpu"):    
    outputs = model(inputs,labels)
    loss, logits = outputs[:2]
    return (loss, logits) if is_valid else loss

In [19]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        if scaler is not None:
            scaler.unscale_(optimizer)
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)

        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          #grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            loss, y_preds = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=True, device=device)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [20]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr=5e-6, decoder_lr=1e-4, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", 
                    "LayerNorm.weight"]
        group1=['layer.0.','layer.1.','layer.2.','layer.3.']
        group2=['layer.4.','layer.5.','layer.6.','layer.7.']    
        group3=['layer.8.','layer.9.','layer.10.','layer.11.']
        group_all=['layer.0.','layer.1.','layer.2.','layer.3.','layer.4.','layer.5.','layer.6.','layer.7.','layer.8.','layer.9.','layer.10.','layer.11.']
        optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': weight_decay, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': weight_decay, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': weight_decay, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': 0.0},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': 0.0, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': 0.0, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': 0.0, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.named_parameters() if "model" not in n], 'lr':decoder_lr, "momentum" : 0.99},
    ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        elif cfg.scheduler == 'polynomial':
            warmup_steps = int(len(train_folds) / CFG.batch_size * 0.1)
            scheduler = get_polynomial_decay_schedule_with_warmup(
                optimizer, warmup_steps, num_train_steps, lr_end=7e-7, power=3.0)
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = Focal_MultiLabel_Loss(gamma=2.0)
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score = get_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [21]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dt

Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 1s (remain 7m 26s) Loss: 0.1684(0.1684) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 13s (remain 0m 23s) Loss: 0.1864(0.1704) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.1476(0.1550) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.1330(0.1482) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0760(0.0760) 


Epoch 1 - avg_train_loss: 0.1482  avg_val_loss: 0.1374  time: 37s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1482  avg_val_loss: 0.1374  time: 37s
Epoch 1 - Score: 0.6948
INFO:__main__:Epoch 1 - Score: 0.6948
Epoch 1 - Save Best Score: 0.6948 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6948 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2675(0.1374) 
f1 score : 0.16483516483516483
recall score : 0.09868421052631579
precision score : 0.5
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 55s) Loss: 0.0919(0.0919) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2124(0.1235) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1264(0.1251) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0928(0.1234) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0351(0.0351) 


Epoch 2 - avg_train_loss: 0.1234  avg_val_loss: 0.1375  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1234  avg_val_loss: 0.1375  time: 37s
Epoch 2 - Score: 0.7068
INFO:__main__:Epoch 2 - Score: 0.7068
Epoch 2 - Save Best Score: 0.7068 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7068 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3325(0.1375) 
f1 score : 0.22340425531914895
recall score : 0.13815789473684212
precision score : 0.5833333333333334
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 54s) Loss: 0.0992(0.0992) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0743(0.1078) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0977(0.1056) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0958(0.1060) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0317(0.0317) 


Epoch 3 - avg_train_loss: 0.1060  avg_val_loss: 0.1343  time: 37s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1060  avg_val_loss: 0.1343  time: 37s
Epoch 3 - Score: 0.6988
INFO:__main__:Epoch 3 - Score: 0.6988


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3201(0.1343) 
f1 score : 0.29906542056074764
recall score : 0.21052631578947367
precision score : 0.5161290322580645
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 38s) Loss: 0.0455(0.0455) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.0133(0.0837) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0755(0.0867) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1251(0.0883) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0234(0.0234) 


Epoch 4 - avg_train_loss: 0.0883  avg_val_loss: 0.1478  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0883  avg_val_loss: 0.1478  time: 36s
Epoch 4 - Score: 0.6948
INFO:__main__:Epoch 4 - Score: 0.6948


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3395(0.1478) 
f1 score : 0.3559322033898305
recall score : 0.27631578947368424
precision score : 0.5
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 39s) Loss: 0.0351(0.0351) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0476(0.0781) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0340(0.0728) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0894(0.0702) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0175(0.0175) 


Epoch 5 - avg_train_loss: 0.0702  avg_val_loss: 0.1766  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0702  avg_val_loss: 0.1766  time: 36s
Epoch 5 - Score: 0.6928
INFO:__main__:Epoch 5 - Score: 0.6928


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3996(0.1766) 
f1 score : 0.3952569169960475
recall score : 0.32894736842105265
precision score : 0.49504950495049505
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 33s) Loss: 0.1741(0.1741) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0342(0.0568) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0298(0.0597) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0682(0.0615) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0234(0.0234) 


Epoch 6 - avg_train_loss: 0.0615  avg_val_loss: 0.1670  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0615  avg_val_loss: 0.1670  time: 36s
Epoch 6 - Score: 0.6747
INFO:__main__:Epoch 6 - Score: 0.6747


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3279(0.1670) 
f1 score : 0.4172661870503597
recall score : 0.3815789473684211
precision score : 0.4603174603174603


Score: 0.7068
INFO:__main__:Score: 0.7068
ACC BEST Score: 0.7129
INFO:__main__:ACC BEST Score: 0.7129
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.22340425531914895
recall score : 0.13815789473684212
precision score : 0.5833333333333334


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 2m 0s) Loss: 0.2696(0.2696) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1362(0.1657) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2307(0.1493) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1494(0.1423) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0190(0.0190) 


Epoch 1 - avg_train_loss: 0.1423  avg_val_loss: 0.1815  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1423  avg_val_loss: 0.1815  time: 36s
Epoch 1 - Score: 0.6928
INFO:__main__:Epoch 1 - Score: 0.6928
Epoch 1 - Save Best Score: 0.6928 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6928 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6357(0.1815) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 2m 10s) Loss: 0.1214(0.1214) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0509(0.1176) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1868(0.1169) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0689(0.1170) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0420(0.0420) 


Epoch 2 - avg_train_loss: 0.1170  avg_val_loss: 0.1341  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1170  avg_val_loss: 0.1341  time: 36s
Epoch 2 - Score: 0.7048
INFO:__main__:Epoch 2 - Score: 0.7048
Epoch 2 - Save Best Score: 0.7048 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7048 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3489(0.1341) 
f1 score : 0.30985915492957744
recall score : 0.21568627450980393
precision score : 0.55
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.0749(0.0749) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0394(0.0899) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1666(0.0934) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0923(0.0923) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0207(0.0207) 


Epoch 3 - avg_train_loss: 0.0923  avg_val_loss: 0.1485  time: 37s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0923  avg_val_loss: 0.1485  time: 37s
Epoch 3 - Score: 0.6988
INFO:__main__:Epoch 3 - Score: 0.6988


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4667(0.1485) 
f1 score : 0.2647058823529412
recall score : 0.17647058823529413
precision score : 0.5294117647058824
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.0500(0.0500) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1236(0.0765) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0662(0.0720) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0685(0.0701) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0164(0.0164) 


Epoch 4 - avg_train_loss: 0.0701  avg_val_loss: 0.1672  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0701  avg_val_loss: 0.1672  time: 36s
Epoch 4 - Score: 0.7209
INFO:__main__:Epoch 4 - Score: 0.7209
Epoch 4 - Save Best Score: 0.7209 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7209 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5244(0.1672) 
f1 score : 0.4279835390946502
recall score : 0.33986928104575165
precision score : 0.5777777777777777
Epoch: [5][0/279] Elapsed 0m 0s (remain 2m 0s) Loss: 0.0471(0.0471) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 22s) Loss: 0.1202(0.0547) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.0632(0.0530) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0705(0.0541) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0491(0.0491) 


Epoch 5 - avg_train_loss: 0.0541  avg_val_loss: 0.1391  time: 37s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0541  avg_val_loss: 0.1391  time: 37s
Epoch 5 - Score: 0.6968
INFO:__main__:Epoch 5 - Score: 0.6968


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2607(0.1391) 
f1 score : 0.529595015576324
recall score : 0.5555555555555556
precision score : 0.5059523809523809
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.0391(0.0391) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.0228(0.0431) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1057(0.0442) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0583(0.0432) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0312(0.0312) 


Epoch 6 - avg_train_loss: 0.0432  avg_val_loss: 0.1711  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0432  avg_val_loss: 0.1711  time: 36s
Epoch 6 - Score: 0.7088
INFO:__main__:Epoch 6 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4385(0.1711) 
f1 score : 0.49477351916376305
recall score : 0.46405228758169936
precision score : 0.5298507462686567


Score: 0.7209
INFO:__main__:Score: 0.7209
ACC BEST Score: 0.7209
INFO:__main__:ACC BEST Score: 0.7209
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.4279835390946502
recall score : 0.33986928104575165
precision score : 0.5777777777777777


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.1825(0.1825) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1156(0.1413) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1327(0.1399) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1246(0.1364) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0410(0.0410) 


Epoch 1 - avg_train_loss: 0.1364  avg_val_loss: 0.1433  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1364  avg_val_loss: 0.1433  time: 36s
Epoch 1 - Score: 0.6948
INFO:__main__:Epoch 1 - Score: 0.6948
Epoch 1 - Save Best Score: 0.6948 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6948 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4327(0.1433) 
f1 score : 0.012987012987012988
recall score : 0.006535947712418301
precision score : 1.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 53s) Loss: 0.1075(0.1075) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.0456(0.1164) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0858(0.1177) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1287(0.1156) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0238(0.0238) 


Epoch 2 - avg_train_loss: 0.1156  avg_val_loss: 0.1438  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1156  avg_val_loss: 0.1438  time: 36s
Epoch 2 - Score: 0.7028
INFO:__main__:Epoch 2 - Score: 0.7028
Epoch 2 - Save Best Score: 0.7028 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7028 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5015(0.1438) 
f1 score : 0.24489795918367346
recall score : 0.1568627450980392
precision score : 0.5581395348837209
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.1130(0.1130) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0555(0.0972) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.0665(0.0932) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0381(0.0932) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0093(0.0093) 


Epoch 3 - avg_train_loss: 0.0932  avg_val_loss: 0.1854  time: 37s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0932  avg_val_loss: 0.1854  time: 37s
Epoch 3 - Score: 0.7088
INFO:__main__:Epoch 3 - Score: 0.7088
Epoch 3 - Save Best Score: 0.7088 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7088 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7309(0.1854) 
f1 score : 0.1807909604519774
recall score : 0.10457516339869281
precision score : 0.6666666666666666
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.0966(0.0966) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0304(0.0736) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.0408(0.0674) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0399(0.0664) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0292(0.0292) 


Epoch 4 - avg_train_loss: 0.0664  avg_val_loss: 0.1458  time: 37s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0664  avg_val_loss: 0.1458  time: 37s
Epoch 4 - Score: 0.7028
INFO:__main__:Epoch 4 - Score: 0.7028


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4845(0.1458) 
f1 score : 0.421875
recall score : 0.35294117647058826
precision score : 0.5242718446601942
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 51s) Loss: 0.0387(0.0387) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.0960(0.0540) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0250(0.0511) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0215(0.0494) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0233(0.0233) 


Epoch 5 - avg_train_loss: 0.0494  avg_val_loss: 0.1864  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0494  avg_val_loss: 0.1864  time: 36s
Epoch 5 - Score: 0.7048
INFO:__main__:Epoch 5 - Score: 0.7048


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6493(0.1864) 
f1 score : 0.41434262948207173
recall score : 0.33986928104575165
precision score : 0.5306122448979592
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 53s) Loss: 0.0121(0.0121) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.0225(0.0378) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0849(0.0401) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0128(0.0386) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0257(0.0257) 


Epoch 6 - avg_train_loss: 0.0386  avg_val_loss: 0.2087  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0386  avg_val_loss: 0.2087  time: 36s
Epoch 6 - Score: 0.6988
INFO:__main__:Epoch 6 - Score: 0.6988


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7263(0.2087) 
f1 score : 0.4140625
recall score : 0.3464052287581699
precision score : 0.5145631067961165


Score: 0.7088
INFO:__main__:Score: 0.7088
ACC BEST Score: 0.7108
INFO:__main__:ACC BEST Score: 0.7108
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.1807909604519774
recall score : 0.10457516339869281
precision score : 0.6666666666666666


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 0.6305(0.6305) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0973(0.1615) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.0622(0.1453) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.1235(0.1445) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0303(0.0303) 


Epoch 1 - avg_train_loss: 0.1445  avg_val_loss: 0.1655  time: 37s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1445  avg_val_loss: 0.1655  time: 37s
Epoch 1 - Score: 0.6928
INFO:__main__:Epoch 1 - Score: 0.6928
Epoch 1 - Save Best Score: 0.6928 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6928 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4908(0.1655) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.1142(0.1142) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0691(0.1230) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.1460(0.1193) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.2253(0.1185) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0129(0.0129) 


Epoch 2 - avg_train_loss: 0.1185  avg_val_loss: 0.1918  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1185  avg_val_loss: 0.1918  time: 37s
Epoch 2 - Score: 0.6928
INFO:__main__:Epoch 2 - Score: 0.6928


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6546(0.1918) 
f1 score : 0.02547770700636943
recall score : 0.013071895424836602
precision score : 0.5
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.0995(0.0995) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.0870(0.0997) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1047(0.0982) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0755(0.0956) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0405(0.0405) 


Epoch 3 - avg_train_loss: 0.0956  avg_val_loss: 0.1303  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0956  avg_val_loss: 0.1303  time: 36s
Epoch 3 - Score: 0.6767
INFO:__main__:Epoch 3 - Score: 0.6767


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2783(0.1303) 
f1 score : 0.43902439024390244
recall score : 0.4117647058823529
precision score : 0.4701492537313433
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 50s) Loss: 0.0776(0.0776) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0380(0.0688) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2726(0.0713) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0794(0.0714) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0057(0.0057) 


Epoch 4 - avg_train_loss: 0.0714  avg_val_loss: 0.2286  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0714  avg_val_loss: 0.2286  time: 36s
Epoch 4 - Score: 0.7129
INFO:__main__:Epoch 4 - Score: 0.7129
Epoch 4 - Save Best Score: 0.7129 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7129 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8192(0.2286) 
f1 score : 0.25906735751295334
recall score : 0.16339869281045752
precision score : 0.625
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.0301(0.0301) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0295(0.0571) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.0077(0.0543) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.1533(0.0539) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0192(0.0192) 


Epoch 5 - avg_train_loss: 0.0539  avg_val_loss: 0.1781  time: 37s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0539  avg_val_loss: 0.1781  time: 37s
Epoch 5 - Score: 0.6867
INFO:__main__:Epoch 5 - Score: 0.6867


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5069(0.1781) 
f1 score : 0.42222222222222217
recall score : 0.37254901960784315
precision score : 0.48717948717948717
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.0374(0.0374) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.0565(0.0407) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0651(0.0452) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0433(0.0436) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0079(0.0079) 


Epoch 6 - avg_train_loss: 0.0436  avg_val_loss: 0.2583  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0436  avg_val_loss: 0.2583  time: 36s
Epoch 6 - Score: 0.7028
INFO:__main__:Epoch 6 - Score: 0.7028


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8614(0.2583) 
f1 score : 0.36206896551724144
recall score : 0.27450980392156865
precision score : 0.5316455696202531


Score: 0.7129
INFO:__main__:Score: 0.7129
ACC BEST Score: 0.7149
INFO:__main__:ACC BEST Score: 0.7149
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.25906735751295334
recall score : 0.16339869281045752
precision score : 0.625


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 56s) Loss: 0.2184(0.2184) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1469(0.1635) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0610(0.1518) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1425(0.1451) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0129(0.0129) 


Epoch 1 - avg_train_loss: 0.1451  avg_val_loss: 0.1907  time: 37s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1451  avg_val_loss: 0.1907  time: 37s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6797(0.1907) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 0.0566(0.0566) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0909(0.1333) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1250(0.1258) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1190(0.1227) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0321(0.0321) 


Epoch 2 - avg_train_loss: 0.1227  avg_val_loss: 0.1336  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1227  avg_val_loss: 0.1336  time: 37s
Epoch 2 - Score: 0.7082
INFO:__main__:Epoch 2 - Score: 0.7082
Epoch 2 - Save Best Score: 0.7082 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7082 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3434(0.1336) 
f1 score : 0.19889502762430936
recall score : 0.11842105263157894
precision score : 0.6206896551724138
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 53s) Loss: 0.1171(0.1171) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1600(0.1036) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1011(0.0988) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0766(0.0988) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0225(0.0225) 


Epoch 3 - avg_train_loss: 0.0988  avg_val_loss: 0.1433  time: 37s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0988  avg_val_loss: 0.1433  time: 37s
Epoch 3 - Score: 0.7062
INFO:__main__:Epoch 3 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3584(0.1433) 
f1 score : 0.30476190476190473
recall score : 0.21052631578947367
precision score : 0.5517241379310345
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 53s) Loss: 0.0369(0.0369) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1144(0.0816) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0517(0.0803) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0664(0.0773) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0159(0.0159) 


Epoch 4 - avg_train_loss: 0.0773  avg_val_loss: 0.1775  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0773  avg_val_loss: 0.1775  time: 36s
Epoch 4 - Score: 0.7143
INFO:__main__:Epoch 4 - Score: 0.7143
Epoch 4 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4120(0.1775) 
f1 score : 0.3931623931623932
recall score : 0.3026315789473684
precision score : 0.5609756097560976
Epoch: [5][0/279] Elapsed 0m 0s (remain 2m 2s) Loss: 0.0589(0.0589) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0372(0.0600) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0400(0.0609) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0885(0.0609) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0114(0.0114) 


Epoch 5 - avg_train_loss: 0.0609  avg_val_loss: 0.2209  time: 37s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0609  avg_val_loss: 0.2209  time: 37s
Epoch 5 - Score: 0.7062
INFO:__main__:Epoch 5 - Score: 0.7062


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5378(0.2209) 
f1 score : 0.34234234234234234
recall score : 0.25
precision score : 0.5428571428571428
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 50s) Loss: 0.0800(0.0800) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0183(0.0483) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0670(0.0495) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0606(0.0512) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0699(0.0699) 


Epoch 6 - avg_train_loss: 0.0512  avg_val_loss: 0.1573  time: 37s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0512  avg_val_loss: 0.1573  time: 37s
Epoch 6 - Score: 0.6579
INFO:__main__:Epoch 6 - Score: 0.6579


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.1758(0.1573) 
f1 score : 0.47204968944099385
recall score : 0.5
precision score : 0.4470588235294118


Score: 0.7143
INFO:__main__:Score: 0.7143
ACC BEST Score: 0.7203
INFO:__main__:ACC BEST Score: 0.7203
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.3931623931623932
recall score : 0.3026315789473684
precision score : 0.5609756097560976


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.1379(0.1379) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1489(0.1518) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0680(0.1448) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1728(0.1405) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0258(0.0258) 


Epoch 1 - avg_train_loss: 0.1405  avg_val_loss: 0.1803  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1405  avg_val_loss: 0.1803  time: 36s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5778(0.1803) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 2m 0s) Loss: 0.1681(0.1681) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1091(0.1171) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.0364(0.1186) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0547(0.1196) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0440(0.0440) 


Epoch 2 - avg_train_loss: 0.1196  avg_val_loss: 0.1542  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1196  avg_val_loss: 0.1542  time: 37s
Epoch 2 - Score: 0.6781
INFO:__main__:Epoch 2 - Score: 0.6781


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4067(0.1542) 
f1 score : 0.1919191919191919
recall score : 0.125
precision score : 0.41304347826086957
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 51s) Loss: 0.1103(0.1103) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.0734(0.1026) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1109(0.0976) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0732(0.0968) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0212(0.0212) 


Epoch 3 - avg_train_loss: 0.0968  avg_val_loss: 0.1893  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0968  avg_val_loss: 0.1893  time: 36s
Epoch 3 - Score: 0.6922
INFO:__main__:Epoch 3 - Score: 0.6922


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5844(0.1893) 
f1 score : 0.19047619047619044
recall score : 0.11842105263157894
precision score : 0.4864864864864865
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 53s) Loss: 0.0982(0.0982) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0366(0.0845) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0133(0.0758) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0558(0.0752) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.1267(0.1267) 


Epoch 4 - avg_train_loss: 0.0752  avg_val_loss: 0.1574  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0752  avg_val_loss: 0.1574  time: 36s
Epoch 4 - Score: 0.6378
INFO:__main__:Epoch 4 - Score: 0.6378


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.1605(0.1574) 
f1 score : 0.49720670391061444
recall score : 0.5855263157894737
precision score : 0.4320388349514563
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 55s) Loss: 0.0462(0.0462) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0490(0.0616) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0501(0.0578) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0586(0.0549) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0502(0.0502) 


Epoch 5 - avg_train_loss: 0.0549  avg_val_loss: 0.1907  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0549  avg_val_loss: 0.1907  time: 36s
Epoch 5 - Score: 0.6479
INFO:__main__:Epoch 5 - Score: 0.6479


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3620(0.1907) 
f1 score : 0.39862542955326463
recall score : 0.3815789473684211
precision score : 0.4172661870503597
Epoch: [6][0/279] Elapsed 0m 0s (remain 2m 0s) Loss: 0.0265(0.0265) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0095(0.0394) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0828(0.0454) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0318(0.0430) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0330(0.0330) 


Epoch 6 - avg_train_loss: 0.0430  avg_val_loss: 0.2315  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0430  avg_val_loss: 0.2315  time: 36s
Epoch 6 - Score: 0.6620
INFO:__main__:Epoch 6 - Score: 0.6620


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4787(0.2315) 
f1 score : 0.373134328358209
recall score : 0.32894736842105265
precision score : 0.43103448275862066


Score: 0.6942
INFO:__main__:Score: 0.6942
ACC BEST Score: 0.6942
INFO:__main__:ACC BEST Score: 0.6942
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.0
recall score : 0.0
precision score : 0.0


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 2m 2s) Loss: 0.2424(0.2424) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1183(0.1601) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0628(0.1458) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0854(0.1404) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0108(0.0108) 


Epoch 1 - avg_train_loss: 0.1404  avg_val_loss: 0.2185  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1404  avg_val_loss: 0.2185  time: 36s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7747(0.2185) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 56s) Loss: 0.1426(0.1426) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1664(0.1188) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.0759(0.1169) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0386(0.1152) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0155(0.0155) 


Epoch 2 - avg_train_loss: 0.1152  avg_val_loss: 0.1675  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1152  avg_val_loss: 0.1675  time: 37s
Epoch 2 - Score: 0.7022
INFO:__main__:Epoch 2 - Score: 0.7022
Epoch 2 - Save Best Score: 0.7022 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7022 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5758(0.1675) 
f1 score : 0.11904761904761904
recall score : 0.06578947368421052
precision score : 0.625
Epoch: [3][0/279] Elapsed 0m 0s (remain 2m 3s) Loss: 0.1808(0.1808) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0530(0.0952) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0693(0.0925) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0850(0.0924) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0335(0.0335) 


Epoch 3 - avg_train_loss: 0.0924  avg_val_loss: 0.1282  time: 37s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0924  avg_val_loss: 0.1282  time: 37s
Epoch 3 - Score: 0.7062
INFO:__main__:Epoch 3 - Score: 0.7062
Epoch 3 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3841(0.1282) 
f1 score : 0.39669421487603307
recall score : 0.3157894736842105
precision score : 0.5333333333333333
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 0.0623(0.0623) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0185(0.0645) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0880(0.0692) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0862(0.0692) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0213(0.0213) 


Epoch 4 - avg_train_loss: 0.0692  avg_val_loss: 0.1496  time: 37s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0692  avg_val_loss: 0.1496  time: 37s
Epoch 4 - Score: 0.7002
INFO:__main__:Epoch 4 - Score: 0.7002


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5937(0.1496) 
f1 score : 0.4063745019920319
recall score : 0.3355263157894737
precision score : 0.5151515151515151
Epoch: [5][0/279] Elapsed 0m 0s (remain 2m 7s) Loss: 0.0361(0.0361) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0933(0.0470) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0290(0.0478) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0197(0.0506) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0557(0.0557) 


Epoch 5 - avg_train_loss: 0.0506  avg_val_loss: 0.1350  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0506  avg_val_loss: 0.1350  time: 36s
Epoch 5 - Score: 0.6660
INFO:__main__:Epoch 5 - Score: 0.6660


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3890(0.1350) 
f1 score : 0.5029940119760479
recall score : 0.5526315789473685
precision score : 0.46153846153846156
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 50s) Loss: 0.0471(0.0471) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.0402(0.0374) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0305(0.0391) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0270(0.0413) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0327(0.0327) 


Epoch 6 - avg_train_loss: 0.0413  avg_val_loss: 0.1587  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0413  avg_val_loss: 0.1587  time: 36s
Epoch 6 - Score: 0.6901
INFO:__main__:Epoch 6 - Score: 0.6901


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6713(0.1587) 
f1 score : 0.48666666666666664
recall score : 0.48026315789473684
precision score : 0.49324324324324326


Score: 0.7062
INFO:__main__:Score: 0.7062
ACC BEST Score: 0.7103
INFO:__main__:ACC BEST Score: 0.7103
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.39669421487603307
recall score : 0.3157894736842105
precision score : 0.5333333333333333


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 0.1242(0.1242) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1216(0.1369) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2312(0.1425) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0361(0.1427) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0246(0.0246) 


Epoch 1 - avg_train_loss: 0.1427  avg_val_loss: 0.1779  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1427  avg_val_loss: 0.1779  time: 36s
Epoch 1 - Score: 0.6982
INFO:__main__:Epoch 1 - Score: 0.6982
Epoch 1 - Save Best Score: 0.6982 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6982 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5483(0.1779) 
f1 score : 0.025974025974025976
recall score : 0.013157894736842105
precision score : 1.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.0786(0.0786) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1377(0.1185) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2005(0.1153) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1092(0.1160) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0332(0.0332) 


Epoch 2 - avg_train_loss: 0.1160  avg_val_loss: 0.1568  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1160  avg_val_loss: 0.1568  time: 36s
Epoch 2 - Score: 0.7103
INFO:__main__:Epoch 2 - Score: 0.7103
Epoch 2 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4149(0.1568) 
f1 score : 0.3076923076923077
recall score : 0.21052631578947367
precision score : 0.5714285714285714
Epoch: [3][0/279] Elapsed 0m 0s (remain 2m 4s) Loss: 0.0548(0.0548) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1657(0.1089) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0426(0.0995) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0817(0.0996) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0215(0.0215) 


Epoch 3 - avg_train_loss: 0.0996  avg_val_loss: 0.1606  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0996  avg_val_loss: 0.1606  time: 36s
Epoch 3 - Score: 0.7143
INFO:__main__:Epoch 3 - Score: 0.7143
Epoch 3 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4912(0.1606) 
f1 score : 0.297029702970297
recall score : 0.19736842105263158
precision score : 0.6
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 54s) Loss: 0.1037(0.1037) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0856(0.0819) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0990(0.0812) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0382(0.0788) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0277(0.0277) 


Epoch 4 - avg_train_loss: 0.0788  avg_val_loss: 0.1454  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0788  avg_val_loss: 0.1454  time: 36s
Epoch 4 - Score: 0.7082
INFO:__main__:Epoch 4 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.3991(0.1454) 
f1 score : 0.44015444015444016
recall score : 0.375
precision score : 0.5327102803738317
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 52s) Loss: 0.0274(0.0274) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0563(0.0661) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0229(0.0666) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0460(0.0636) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0257(0.0257) 


Epoch 5 - avg_train_loss: 0.0636  avg_val_loss: 0.1546  time: 37s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0636  avg_val_loss: 0.1546  time: 37s
Epoch 5 - Score: 0.6901
INFO:__main__:Epoch 5 - Score: 0.6901


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4329(0.1546) 
f1 score : 0.4122137404580153
recall score : 0.35526315789473684
precision score : 0.4909090909090909
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.1447(0.1447) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0703(0.0520) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0205(0.0526) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0118(0.0538) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0211(0.0211) 


Epoch 6 - avg_train_loss: 0.0538  avg_val_loss: 0.1665  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0538  avg_val_loss: 0.1665  time: 36s
Epoch 6 - Score: 0.6881
INFO:__main__:Epoch 6 - Score: 0.6881


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5054(0.1665) 
f1 score : 0.3921568627450981
recall score : 0.32894736842105265
precision score : 0.4854368932038835


Score: 0.7143
INFO:__main__:Score: 0.7143
ACC BEST Score: 0.7243
INFO:__main__:ACC BEST Score: 0.7243
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.297029702970297
recall score : 0.19736842105263158
precision score : 0.6


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.3315(0.3315) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1705(0.1807) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1474(0.1631) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1629(0.1541) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0280(0.0280) 


Epoch 1 - avg_train_loss: 0.1541  avg_val_loss: 0.1735  time: 37s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1541  avg_val_loss: 0.1735  time: 37s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5305(0.1735) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 49s) Loss: 0.1009(0.1009) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1197(0.1238) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.1440(0.1221) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0274(0.1208) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0258(0.0258) 


Epoch 2 - avg_train_loss: 0.1208  avg_val_loss: 0.1593  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1208  avg_val_loss: 0.1593  time: 37s
Epoch 2 - Score: 0.6942
INFO:__main__:Epoch 2 - Score: 0.6942


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4503(0.1593) 
f1 score : 0.012987012987012988
recall score : 0.006578947368421052
precision score : 0.5
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 56s) Loss: 0.1312(0.1312) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.1445(0.1030) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1269(0.1049) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0765(0.1024) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.1269(0.1269) 


Epoch 3 - avg_train_loss: 0.1024  avg_val_loss: 0.1224  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.1024  avg_val_loss: 0.1224  time: 36s
Epoch 3 - Score: 0.6640
INFO:__main__:Epoch 3 - Score: 0.6640


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.0853(0.1224) 
f1 score : 0.5522788203753352
recall score : 0.6776315789473685
precision score : 0.4660633484162896
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 0.2095(0.2095) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0336(0.0902) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0791(0.0815) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0470(0.0791) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0414(0.0414) 


Epoch 4 - avg_train_loss: 0.0791  avg_val_loss: 0.1345  time: 37s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0791  avg_val_loss: 0.1345  time: 37s
Epoch 4 - Score: 0.7022
INFO:__main__:Epoch 4 - Score: 0.7022
Epoch 4 - Save Best Score: 0.7022 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7022 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2010(0.1345) 
f1 score : 0.443609022556391
recall score : 0.3881578947368421
precision score : 0.5175438596491229
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 0.0350(0.0350) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0102(0.0623) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.1088(0.0620) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0231(0.0618) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0313(0.0313) 


Epoch 5 - avg_train_loss: 0.0618  avg_val_loss: 0.1534  time: 37s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0618  avg_val_loss: 0.1534  time: 37s
Epoch 5 - Score: 0.7123
INFO:__main__:Epoch 5 - Score: 0.7123
Epoch 5 - Save Best Score: 0.7123 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7123 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2111(0.1534) 
f1 score : 0.43478260869565216
recall score : 0.3618421052631579
precision score : 0.5445544554455446
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 56s) Loss: 0.0965(0.0965) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0016(0.0473) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0124(0.0528) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0239(0.0530) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0390(0.0390) 


Epoch 6 - avg_train_loss: 0.0530  avg_val_loss: 0.1538  time: 37s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0530  avg_val_loss: 0.1538  time: 37s
Epoch 6 - Score: 0.7123
INFO:__main__:Epoch 6 - Score: 0.7123


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.1845(0.1538) 
f1 score : 0.48375451263537905
recall score : 0.4407894736842105
precision score : 0.536


Score: 0.7123
INFO:__main__:Score: 0.7123
ACC BEST Score: 0.7143
INFO:__main__:ACC BEST Score: 0.7143
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.43478260869565216
recall score : 0.3618421052631579
precision score : 0.5445544554455446


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 2m 4s) Loss: 0.4151(0.4151) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2205(0.1414) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1024(0.1358) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0652(0.1346) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0241(0.0241) 


Epoch 1 - avg_train_loss: 0.1346  avg_val_loss: 0.1634  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.1346  avg_val_loss: 0.1634  time: 36s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4824(0.1634) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 0.0558(0.0558) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1293(0.1117) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0160(0.1124) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1443(0.1107) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0599(0.0599) 


Epoch 2 - avg_train_loss: 0.1107  avg_val_loss: 0.1300  time: 37s
INFO:__main__:Epoch 2 - avg_train_loss: 0.1107  avg_val_loss: 0.1300  time: 37s
Epoch 2 - Score: 0.6901
INFO:__main__:Epoch 2 - Score: 0.6901


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.2416(0.1300) 
f1 score : 0.45390070921985815
recall score : 0.42105263157894735
precision score : 0.49230769230769234
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.0879(0.0879) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.0427(0.0901) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0984(0.0845) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0645(0.0810) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0645(0.0645) 


Epoch 3 - avg_train_loss: 0.0810  avg_val_loss: 0.1361  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.0810  avg_val_loss: 0.1361  time: 36s
Epoch 3 - Score: 0.6398
INFO:__main__:Epoch 3 - Score: 0.6398


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.1897(0.1361) 
f1 score : 0.48115942028985503
recall score : 0.5460526315789473
precision score : 0.43005181347150256
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 52s) Loss: 0.0521(0.0521) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0166(0.0451) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1165(0.0484) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0312(0.0477) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0348(0.0348) 


Epoch 4 - avg_train_loss: 0.0477  avg_val_loss: 0.1800  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0477  avg_val_loss: 0.1800  time: 36s
Epoch 4 - Score: 0.6640
INFO:__main__:Epoch 4 - Score: 0.6640


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.4234(0.1800) 
f1 score : 0.44884488448844884
recall score : 0.4473684210526316
precision score : 0.4503311258278146
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.0454(0.0454) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0307(0.0277) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0464(0.0271) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0127(0.0263) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.0150(0.0150) 


Epoch 5 - avg_train_loss: 0.0263  avg_val_loss: 0.2951  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0263  avg_val_loss: 0.2951  time: 36s
Epoch 5 - Score: 0.6620
INFO:__main__:Epoch 5 - Score: 0.6620


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8591(0.2951) 
f1 score : 0.39568345323741005
recall score : 0.3618421052631579
precision score : 0.4365079365079365
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.0029(0.0029) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0201(0.0215) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0070(0.0187) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0134(0.0190) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.0109(0.0109) 


Epoch 6 - avg_train_loss: 0.0190  avg_val_loss: 0.3671  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0190  avg_val_loss: 0.3671  time: 36s
Epoch 6 - Score: 0.6559
INFO:__main__:Epoch 6 - Score: 0.6559


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0918(0.3671) 
f1 score : 0.3643122676579926
recall score : 0.3223684210526316
precision score : 0.4188034188034188


Score: 0.6942
INFO:__main__:Score: 0.6942
ACC BEST Score: 0.7062
INFO:__main__:ACC BEST Score: 0.7062
Score: 0.7085
INFO:__main__:Score: 0.7085
ACC BEST Score: 0.7085
INFO:__main__:ACC BEST Score: 0.7085


f1 score : 0.0
recall score : 0.0
precision score : 0.0
f1 score : 0.287819253438114
recall score : 0.19238345370978333
precision score : 0.571150097465887


In [None]:
from google.colab import runtime
runtime.unassign()