In [1]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
!nvidia-smi

Sat Apr 22 14:52:25 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    45W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup, get_polynomial_decay_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
CUSTOM_MODEL_DIR = os.path.join(OUTPUT_DIR,'clrp_deberta_v3_base_epoch20')
OUTPUT_EXP_DIR = DIR + '/output/EXP026/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [6]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model_name="microsoft/deberta-v3-base"
    # model="microsoft/deberta-v3-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    model = CUSTOM_MODEL_DIR
    scheduler='polynomial' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=6
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=256
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=10
    trn_fold=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = False
    freezing = False
    num_reinit_layers = 1
    is_reinit_layer = True

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    print(f"thresh : {best_thresh}")
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 6)


Unnamed: 0,id,title,year,abstract,keywords,y
0,1,Hierarchical Adversarially Learned Inference,2018,We propose a novel hierarchical generative mod...,"generative, hierarchical, unsupervised, semisu...",0
1,2,Learning to Compute Word Embeddings On the Fly,2018,Words in natural language follow a Zipfian dis...,"NLU, word embeddings, representation learning",0
2,3,Graph2Seq: Scalable Learning Dynamics for Graphs,2018,Neural networks are increasingly used as a gen...,,0


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [10]:
train["texts"] = train["title"] + "[SEP]" + train["abstract"]  

In [11]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [12]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

In [13]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 3 # cls
LOGGER.info(f"max_len: {CFG.max_len}")

100%|██████████| 4974/4974 [00:03<00:00, 1339.47it/s]
max_len: 522
INFO:__main__:max_len: 522


In [14]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

class ValidDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [15]:
def reinit_layers(model):

    #for layer in model.model.encoder.layer[-CFG.num_reinit_layers:]:
    for layer in model.encoder.layer[-CFG.num_reinit_layers:]:    #Custome model内(backbone)

            for module in layer.modules():

                if isinstance(module,nn.Linear):
                    module.weight.data.normal_(mean=0.0,std=model.config.initializer_range)
                    if module.bias is not None:
                            module.bias.data.zero_()
                elif isinstance(module, nn.Embedding):
                        module.weight.data.normal_(mean=0.0, std=model.config.initializer_range)
                        if module.padding_idx is not None:
                            module.weight.data[module.padding_idx].zero_()
                elif isinstance(module, nn.LayerNorm):
                        module.bias.data.zero_()
                        module.weight.data.fill_(1.0)
                        
    return model

In [16]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if CFG.is_reinit_layer:
            self.model = reinit_layers(self.model)
            print(f'Reinitializing Last {CFG.num_reinit_layers} Layers.')
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())
        
        self.high_dropout = nn.Dropout(p=0.5)

        self.pool = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        self.layer_norm1 = nn.LayerNorm(self.config.hidden_size)
        self._init_weights(self.layer_norm1)
        #self.sig = nn.Sigmoid()
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        feature = self.layer_norm1(feature)
        return feature, outputs

    def forward(self, inputs=None, labels=None):
        feature, outputs = self.feature(inputs)
        logits = torch.mean(
            torch.stack(
                [self.fc(self.high_dropout(feature)) for _ in range(5)],
                dim=0,
            ),
            dim=0
        )
        
        return logits

In [17]:
def calculate_loss(inputs, labels, model, criterion, is_valid=True, device="cpu"):    
    outputs = model(inputs,labels)
    loss = criterion(outputs.view(1,-1), labels.view(1,-1))
    return (loss, outputs) if is_valid else loss

In [18]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            loss = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=False, device=device)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        if scaler is not None:
            scaler.unscale_(optimizer)
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)

        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  #'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          #grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            loss, y_preds = calculate_loss(inputs=inputs, labels=labels, model=model, criterion=criterion, is_valid=True, device=device)
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [19]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr=5e-6, decoder_lr=1e-4, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", 
                    "LayerNorm.weight"]
        group1=['layer.0.','layer.1.','layer.2.','layer.3.']
        group2=['layer.4.','layer.5.','layer.6.','layer.7.']    
        group3=['layer.8.','layer.9.','layer.10.','layer.11.']
        group_all=['layer.0.','layer.1.','layer.2.','layer.3.','layer.4.','layer.5.','layer.6.','layer.7.','layer.8.','layer.9.','layer.10.','layer.11.']
        optimizer_parameters = [
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': weight_decay},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': weight_decay, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': weight_decay, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': weight_decay, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and not any(nd in n for nd in group_all)],'weight_decay': 0.0},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group1)],'weight_decay': 0.0, 'lr': encoder_lr/2.6},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group2)],'weight_decay': 0.0, 'lr': encoder_lr},
        {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay) and any(nd in n for nd in group3)],'weight_decay': 0.0, 'lr': encoder_lr*2.6},
        {'params': [p for n, p in model.named_parameters() if "model" not in n], 'lr':decoder_lr, "momentum" : 0.99},
    ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        elif cfg.scheduler == 'polynomial':
            warmup_steps = int(len(train_folds) / CFG.batch_size * 0.1)
            scheduler = get_polynomial_decay_schedule_with_warmup(
                optimizer, warmup_steps, num_train_steps, lr_end=7e-7, power=3.0)
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss()
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score_05 = get_score(valid_labels, predictions)
        score = get_acc_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model_name.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [None]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "torch_dt

Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 1s (remain 7m 6s) Loss: 0.6850(0.6850) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 13s (remain 0m 23s) Loss: 0.7180(0.6593) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 24s (remain 0m 9s) Loss: 0.6306(0.6413) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 33s (remain 0m 0s) Loss: 0.6026(0.6317) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4987(0.4987) 


Epoch 1 - avg_train_loss: 0.6317  avg_val_loss: 0.6048  time: 37s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6317  avg_val_loss: 0.6048  time: 37s
Epoch 1 - Score: 0.7028
INFO:__main__:Epoch 1 - Score: 0.7028
Epoch 1 - Save Best Score: 0.7028 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7028 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8120(0.6048) 
f1 score : 0.18848167539267013
recall score : 0.11842105263157894
precision score : 0.46153846153846156
thresh : 0.47
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.5321(0.5321) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.7760(0.5832) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5995(0.5854) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5318(0.5818) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3250(0.3250) 


Epoch 2 - avg_train_loss: 0.5818  avg_val_loss: 0.5676  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5818  avg_val_loss: 0.5676  time: 36s
Epoch 2 - Score: 0.7149
INFO:__main__:Epoch 2 - Score: 0.7149
Epoch 2 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9681(0.5676) 
f1 score : 0.24083769633507854
recall score : 0.1513157894736842
precision score : 0.5897435897435898
thresh : 0.44
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.5170(0.5170) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.4536(0.5392) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4901(0.5350) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5388(0.5336) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3525(0.3525) 


Epoch 3 - avg_train_loss: 0.5336  avg_val_loss: 0.5677  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5336  avg_val_loss: 0.5677  time: 36s
Epoch 3 - Score: 0.7149
INFO:__main__:Epoch 3 - Score: 0.7149


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8511(0.5677) 
f1 score : 0.4258555133079847
recall score : 0.3684210526315789
precision score : 0.5045045045045045
thresh : 0.65
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 32s) Loss: 0.3708(0.3708) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2370(0.4557) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.3995(0.4565) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 31s (remain 0m 0s) Loss: 0.6192(0.4584) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2716(0.2716) 


Epoch 4 - avg_train_loss: 0.4584  avg_val_loss: 0.6018  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4584  avg_val_loss: 0.6018  time: 36s
Epoch 4 - Score: 0.7209
INFO:__main__:Epoch 4 - Score: 0.7209
Epoch 4 - Save Best Score: 0.7209 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7209 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0386(0.6018) 
f1 score : 0.41245136186770426
recall score : 0.34868421052631576
precision score : 0.5047619047619047
thresh : 0.72
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.3118(0.3118) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3397(0.4181) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2876(0.3988) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4698(0.3902) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2827(0.2827) 


Epoch 5 - avg_train_loss: 0.3902  avg_val_loss: 0.6700  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3902  avg_val_loss: 0.6700  time: 36s
Epoch 5 - Score: 0.7088
INFO:__main__:Epoch 5 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0172(0.6700) 
f1 score : 0.4270462633451957
recall score : 0.39473684210526316
precision score : 0.46511627906976744
thresh : 0.79
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 28s) Loss: 0.5261(0.5261) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3276(0.3296) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2817(0.3391) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3750(0.3449) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2078(0.2078) 


Epoch 6 - avg_train_loss: 0.3449  avg_val_loss: 0.7080  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.3449  avg_val_loss: 0.7080  time: 36s
Epoch 6 - Score: 0.7048
INFO:__main__:Epoch 6 - Score: 0.7048


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2881(0.7080) 
f1 score : 0.39534883720930236
recall score : 0.3355263157894737
precision score : 0.4811320754716981
thresh : 0.78


Score: 0.6968
INFO:__main__:Score: 0.6968
ACC BEST Score: 0.7209
INFO:__main__:ACC BEST Score: 0.7209
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.41245136186770426
recall score : 0.34868421052631576
precision score : 0.5047619047619047
thresh : 0.72


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 53s) Loss: 0.8376(0.8376) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.6266(0.6663) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.7470(0.6412) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6954(0.6282) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2387(0.2387) 


Epoch 1 - avg_train_loss: 0.6282  avg_val_loss: 0.6024  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6282  avg_val_loss: 0.6024  time: 36s
Epoch 1 - Score: 0.7209
INFO:__main__:Epoch 1 - Score: 0.7209
Epoch 1 - Save Best Score: 0.7209 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7209 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4183(0.6024) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.32
Epoch: [2][0/279] Elapsed 0m 0s (remain 2m 0s) Loss: 0.6201(0.6201) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3611(0.5867) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6943(0.5785) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4848(0.5770) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.3843(0.3843) 


Epoch 2 - avg_train_loss: 0.5770  avg_val_loss: 0.5767  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5770  avg_val_loss: 0.5767  time: 36s
Epoch 2 - Score: 0.7108
INFO:__main__:Epoch 2 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9624(0.5767) 
f1 score : 0.3167420814479638
recall score : 0.22875816993464052
precision score : 0.5147058823529411
thresh : 0.53
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.4846(0.4846) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.3769(0.5153) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.8346(0.5220) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5251(0.5202) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3114(0.3114) 


Epoch 3 - avg_train_loss: 0.5202  avg_val_loss: 0.5587  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5202  avg_val_loss: 0.5587  time: 36s
Epoch 3 - Score: 0.7269
INFO:__main__:Epoch 3 - Score: 0.7269
Epoch 3 - Save Best Score: 0.7269 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7269 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0444(0.5587) 
f1 score : 0.34821428571428575
recall score : 0.2549019607843137
precision score : 0.5492957746478874
thresh : 0.46
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 52s) Loss: 0.4135(0.4135) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.6335(0.4822) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4531(0.4655) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4998(0.4575) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3277(0.3277) 


Epoch 4 - avg_train_loss: 0.4575  avg_val_loss: 0.5854  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4575  avg_val_loss: 0.5854  time: 36s
Epoch 4 - Score: 0.7189
INFO:__main__:Epoch 4 - Score: 0.7189


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0473(0.5854) 
f1 score : 0.4723247232472325
recall score : 0.41830065359477125
precision score : 0.5423728813559322
thresh : 0.67
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.3352(0.3352) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.6145(0.3970) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4259(0.3847) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4609(0.3879) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4922(0.4922) 


Epoch 5 - avg_train_loss: 0.3879  avg_val_loss: 0.6355  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3879  avg_val_loss: 0.6355  time: 36s
Epoch 5 - Score: 0.7189
INFO:__main__:Epoch 5 - Score: 0.7189


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7941(0.6355) 
f1 score : 0.5459940652818991
recall score : 0.6013071895424836
precision score : 0.5
thresh : 0.64
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 36s) Loss: 0.3556(0.3556) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2576(0.3449) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.4790(0.3431) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4891(0.3403) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3954(0.3954) 


Epoch 6 - avg_train_loss: 0.3403  avg_val_loss: 0.6739  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.3403  avg_val_loss: 0.6739  time: 36s
Epoch 6 - Score: 0.7088
INFO:__main__:Epoch 6 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2165(0.6739) 
f1 score : 0.47183098591549294
recall score : 0.43790849673202614
precision score : 0.5114503816793893
thresh : 0.57


Score: 0.7068
INFO:__main__:Score: 0.7068
ACC BEST Score: 0.7269
INFO:__main__:ACC BEST Score: 0.7269
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.34821428571428575
recall score : 0.2549019607843137
precision score : 0.5492957746478874
thresh : 0.46


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 50s) Loss: 0.7084(0.7084) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5827(0.6257) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6357(0.6238) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6022(0.6184) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3881(0.3881) 


Epoch 1 - avg_train_loss: 0.6184  avg_val_loss: 0.5792  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6184  avg_val_loss: 0.5792  time: 36s
Epoch 1 - Score: 0.7068
INFO:__main__:Epoch 1 - Score: 0.7068
Epoch 1 - Save Best Score: 0.7068 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7068 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0469(0.5792) 
f1 score : 0.012987012987012988
recall score : 0.006535947712418301
precision score : 1.0
thresh : 0.46
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 40s) Loss: 0.5612(0.5612) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3976(0.5736) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5141(0.5769) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6042(0.5710) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3531(0.3531) 


Epoch 2 - avg_train_loss: 0.5710  avg_val_loss: 0.5602  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5710  avg_val_loss: 0.5602  time: 36s
Epoch 2 - Score: 0.7149
INFO:__main__:Epoch 2 - Score: 0.7149
Epoch 2 - Save Best Score: 0.7149 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7149 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0670(0.5602) 
f1 score : 0.3650793650793651
recall score : 0.3006535947712418
precision score : 0.46464646464646464
thresh : 0.64
Epoch: [3][0/279] Elapsed 0m 0s (remain 2m 2s) Loss: 0.5314(0.5314) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.4113(0.5181) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4345(0.5085) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3285(0.5078) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2458(0.2458) 


Epoch 3 - avg_train_loss: 0.5078  avg_val_loss: 0.5703  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5078  avg_val_loss: 0.5703  time: 36s
Epoch 3 - Score: 0.7068
INFO:__main__:Epoch 3 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3240(0.5703) 
f1 score : 0.2512562814070352
recall score : 0.16339869281045752
precision score : 0.5434782608695652
thresh : 0.47
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 52s) Loss: 0.5538(0.5538) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3110(0.4422) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3478(0.4160) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2936(0.4142) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3733(0.3733) 


Epoch 4 - avg_train_loss: 0.4142  avg_val_loss: 0.6018  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4142  avg_val_loss: 0.6018  time: 36s
Epoch 4 - Score: 0.7169
INFO:__main__:Epoch 4 - Score: 0.7169
Epoch 4 - Save Best Score: 0.7169 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7169 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2806(0.6018) 
f1 score : 0.43579766536964976
recall score : 0.3660130718954248
precision score : 0.5384615384615384
thresh : 0.46
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.3173(0.3173) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.4507(0.3494) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2628(0.3346) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1873(0.3264) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4224(0.4224) 


Epoch 5 - avg_train_loss: 0.3264  avg_val_loss: 0.6913  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3264  avg_val_loss: 0.6913  time: 36s
Epoch 5 - Score: 0.7088
INFO:__main__:Epoch 5 - Score: 0.7088


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4428(0.6913) 
f1 score : 0.45985401459854014
recall score : 0.4117647058823529
precision score : 0.5206611570247934
thresh : 0.44
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.1888(0.1888) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2325(0.2624) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 8s) Loss: 0.4552(0.2697) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1346(0.2640) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4773(0.4773) 


Epoch 6 - avg_train_loss: 0.2640  avg_val_loss: 0.7646  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.2640  avg_val_loss: 0.7646  time: 36s
Epoch 6 - Score: 0.7008
INFO:__main__:Epoch 6 - Score: 0.7008


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.6326(0.7646) 
f1 score : 0.4555160142348754
recall score : 0.41830065359477125
precision score : 0.5
thresh : 0.73


Score: 0.7088
INFO:__main__:Score: 0.7088
ACC BEST Score: 0.7169
INFO:__main__:ACC BEST Score: 0.7169
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.43579766536964976
recall score : 0.3660130718954248
precision score : 0.5384615384615384
thresh : 0.46


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 54s) Loss: 1.2442(1.2442) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.5546(0.6571) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4608(0.6299) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6143(0.6264) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2900(0.2900) 


Epoch 1 - avg_train_loss: 0.6264  avg_val_loss: 0.5926  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6264  avg_val_loss: 0.5926  time: 36s
Epoch 1 - Score: 0.7068
INFO:__main__:Epoch 1 - Score: 0.7068
Epoch 1 - Save Best Score: 0.7068 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7068 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2268(0.5926) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.35
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 57s) Loss: 0.5599(0.5599) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.4277(0.5775) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6150(0.5708) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6501(0.5705) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3775(0.3775) 


Epoch 2 - avg_train_loss: 0.5705  avg_val_loss: 0.5844  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5705  avg_val_loss: 0.5844  time: 36s
Epoch 2 - Score: 0.7088
INFO:__main__:Epoch 2 - Score: 0.7088
Epoch 2 - Save Best Score: 0.7088 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7088 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9640(0.5844) 
f1 score : 0.25615763546798026
recall score : 0.16993464052287582
precision score : 0.52
thresh : 0.54
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 58s) Loss: 0.5248(0.5248) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4959(0.5170) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5298(0.5133) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4614(0.5052) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.2874(0.2874) 


Epoch 3 - avg_train_loss: 0.5052  avg_val_loss: 0.5766  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5052  avg_val_loss: 0.5766  time: 36s
Epoch 3 - Score: 0.7108
INFO:__main__:Epoch 3 - Score: 0.7108
Epoch 3 - Save Best Score: 0.7108 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7108 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0746(0.5766) 
f1 score : 0.3833333333333333
recall score : 0.3006535947712418
precision score : 0.5287356321839081
thresh : 0.62
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 55s) Loss: 0.4822(0.4822) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2986(0.4096) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.8996(0.4124) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4687(0.4091) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1460(0.1460) 


Epoch 4 - avg_train_loss: 0.4091  avg_val_loss: 0.6875  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4091  avg_val_loss: 0.6875  time: 36s
Epoch 4 - Score: 0.7108
INFO:__main__:Epoch 4 - Score: 0.7108


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.7826(0.6875) 
f1 score : 0.2801932367149758
recall score : 0.1895424836601307
precision score : 0.5370370370370371
thresh : 0.39
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.2677(0.2677) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.2643(0.3251) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1036(0.3150) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5877(0.3116) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.3166(0.3166) 


Epoch 5 - avg_train_loss: 0.3116  avg_val_loss: 0.7106  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3116  avg_val_loss: 0.7106  time: 36s
Epoch 5 - Score: 0.7048
INFO:__main__:Epoch 5 - Score: 0.7048


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4057(0.7106) 
f1 score : 0.4620689655172414
recall score : 0.43790849673202614
precision score : 0.48905109489051096
thresh : 0.73
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.2754(0.2754) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2527(0.2427) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3716(0.2528) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3323(0.2476) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.1699(0.1699) 


Epoch 6 - avg_train_loss: 0.2476  avg_val_loss: 0.8567  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.2476  avg_val_loss: 0.8567  time: 36s
Epoch 6 - Score: 0.7068
INFO:__main__:Epoch 6 - Score: 0.7068


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 2.1779(0.8567) 
f1 score : 0.3333333333333333
recall score : 0.26143790849673204
precision score : 0.45977011494252873
thresh : 0.77


Score: 0.7028
INFO:__main__:Score: 0.7028
ACC BEST Score: 0.7108
INFO:__main__:ACC BEST Score: 0.7108
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.3833333333333333
recall score : 0.3006535947712418
precision score : 0.5287356321839081
thresh : 0.62


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 51s) Loss: 0.7645(0.7645) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.6802(0.6509) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4748(0.6412) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6408(0.6308) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2193(0.2193) 


Epoch 1 - avg_train_loss: 0.6308  avg_val_loss: 0.5978  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6308  avg_val_loss: 0.5978  time: 36s
Epoch 1 - Score: 0.7042
INFO:__main__:Epoch 1 - Score: 0.7042
Epoch 1 - Save Best Score: 0.7042 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7042 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3914(0.5978) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.35
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 51s) Loss: 0.4361(0.4361) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5165(0.6043) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6018(0.5901) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5852(0.5850) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3078(0.3078) 


Epoch 2 - avg_train_loss: 0.5850  avg_val_loss: 0.5674  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5850  avg_val_loss: 0.5674  time: 36s
Epoch 2 - Score: 0.7143
INFO:__main__:Epoch 2 - Score: 0.7143
Epoch 2 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9915(0.5674) 
f1 score : 0.1899441340782123
recall score : 0.1118421052631579
precision score : 0.6296296296296297
thresh : 0.46
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.5650(0.5650) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.7180(0.5444) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5157(0.5299) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4714(0.5252) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2934(0.2934) 


Epoch 3 - avg_train_loss: 0.5252  avg_val_loss: 0.5775  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5252  avg_val_loss: 0.5775  time: 36s
Epoch 3 - Score: 0.7163
INFO:__main__:Epoch 3 - Score: 0.7163
Epoch 3 - Save Best Score: 0.7163 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7163 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9150(0.5775) 
f1 score : 0.39183673469387753
recall score : 0.3157894736842105
precision score : 0.5161290322580645
thresh : 0.71
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 52s) Loss: 0.3016(0.3016) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.6059(0.4761) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4046(0.4718) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4538(0.4602) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2398(0.2398) 


Epoch 4 - avg_train_loss: 0.4602  avg_val_loss: 0.6237  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4602  avg_val_loss: 0.6237  time: 36s
Epoch 4 - Score: 0.7223
INFO:__main__:Epoch 4 - Score: 0.7223
Epoch 4 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0690(0.6237) 
f1 score : 0.41322314049586784
recall score : 0.32894736842105265
precision score : 0.5555555555555556
thresh : 0.56
Epoch: [5][0/279] Elapsed 0m 0s (remain 2m 1s) Loss: 0.4282(0.4282) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.2909(0.3887) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3090(0.3883) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4719(0.3872) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2313(0.2313) 


Epoch 5 - avg_train_loss: 0.3872  avg_val_loss: 0.6818  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3872  avg_val_loss: 0.6818  time: 36s
Epoch 5 - Score: 0.7223
INFO:__main__:Epoch 5 - Score: 0.7223


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1909(0.6818) 
f1 score : 0.41767068273092367
recall score : 0.34210526315789475
precision score : 0.5360824742268041
thresh : 0.62
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 42s) Loss: 0.4685(0.4685) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1858(0.3316) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4567(0.3330) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3565(0.3366) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4074(0.4074) 


Epoch 6 - avg_train_loss: 0.3366  avg_val_loss: 0.7264  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.3366  avg_val_loss: 0.7264  time: 36s
Epoch 6 - Score: 0.7143
INFO:__main__:Epoch 6 - Score: 0.7143


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8892(0.7264) 
f1 score : 0.45333333333333337
recall score : 0.4473684210526316
precision score : 0.4594594594594595
thresh : 0.69


Score: 0.7143
INFO:__main__:Score: 0.7143
ACC BEST Score: 0.7223
INFO:__main__:ACC BEST Score: 0.7223
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.41322314049586784
recall score : 0.32894736842105265
precision score : 0.5555555555555556
thresh : 0.56


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.6305(0.6305) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.6764(0.6387) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4733(0.6298) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.7154(0.6228) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.2839(0.2839) 


Epoch 1 - avg_train_loss: 0.6228  avg_val_loss: 0.6066  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6228  avg_val_loss: 0.6066  time: 36s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3095(0.6066) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.38
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.7081(0.7081) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.5739(0.5726) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3821(0.5753) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3826(0.5765) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3962(0.3962) 


Epoch 2 - avg_train_loss: 0.5765  avg_val_loss: 0.5985  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5765  avg_val_loss: 0.5985  time: 36s
Epoch 2 - Score: 0.7002
INFO:__main__:Epoch 2 - Score: 0.7002
Epoch 2 - Save Best Score: 0.7002 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7002 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0250(0.5985) 
f1 score : 0.25339366515837103
recall score : 0.18421052631578946
precision score : 0.4057971014492754
thresh : 0.56
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 50s) Loss: 0.5485(0.5485) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4112(0.5234) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5173(0.5157) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4428(0.5142) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2951(0.2951) 


Epoch 3 - avg_train_loss: 0.5142  avg_val_loss: 0.6078  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5142  avg_val_loss: 0.6078  time: 36s
Epoch 3 - Score: 0.7022
INFO:__main__:Epoch 3 - Score: 0.7022
Epoch 3 - Save Best Score: 0.7022 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7022 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1896(0.6078) 
f1 score : 0.2318840579710145
recall score : 0.15789473684210525
precision score : 0.43636363636363634
thresh : 0.55
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 54s) Loss: 0.5037(0.5037) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3815(0.4613) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1930(0.4385) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2885(0.4289) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4821(0.4821) 


Epoch 4 - avg_train_loss: 0.4289  avg_val_loss: 0.6859  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4289  avg_val_loss: 0.6859  time: 36s
Epoch 4 - Score: 0.6881
INFO:__main__:Epoch 4 - Score: 0.6881


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9373(0.6859) 
f1 score : 0.42105263157894735
recall score : 0.42105263157894735
precision score : 0.42105263157894735
thresh : 0.67
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.2547(0.2547) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3755(0.3539) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3326(0.3459) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3237(0.3348) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5101(0.5101) 


Epoch 5 - avg_train_loss: 0.3348  avg_val_loss: 0.7692  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3348  avg_val_loss: 0.7692  time: 36s
Epoch 5 - Score: 0.6881
INFO:__main__:Epoch 5 - Score: 0.6881


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.9368(0.7692) 
f1 score : 0.4536741214057508
recall score : 0.46710526315789475
precision score : 0.4409937888198758
thresh : 0.79
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 48s) Loss: 0.1528(0.1528) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1308(0.2567) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4336(0.2753) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2477(0.2672) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4366(0.4366) 


Epoch 6 - avg_train_loss: 0.2672  avg_val_loss: 0.8683  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.2672  avg_val_loss: 0.8683  time: 36s
Epoch 6 - Score: 0.6861
INFO:__main__:Epoch 6 - Score: 0.6861


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1781(0.8683) 
f1 score : 0.43243243243243246
recall score : 0.42105263157894735
precision score : 0.4444444444444444
thresh : 0.67


Score: 0.6801
INFO:__main__:Score: 0.6801
ACC BEST Score: 0.7022
INFO:__main__:ACC BEST Score: 0.7022
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.2318840579710145
recall score : 0.15789473684210525
precision score : 0.43636363636363634
thresh : 0.55


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 55s) Loss: 0.7997(0.7997) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.5802(0.6556) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4213(0.6311) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5191(0.6222) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.2191(0.2191) 


Epoch 1 - avg_train_loss: 0.6222  avg_val_loss: 0.6119  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6222  avg_val_loss: 0.6119  time: 36s
Epoch 1 - Score: 0.7062
INFO:__main__:Epoch 1 - Score: 0.7062
Epoch 1 - Save Best Score: 0.7062 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7062 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4779(0.6119) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.28
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 52s) Loss: 0.6473(0.6473) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.6890(0.5765) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4922(0.5761) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3779(0.5727) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.2619(0.2619) 


Epoch 2 - avg_train_loss: 0.5727  avg_val_loss: 0.5723  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5727  avg_val_loss: 0.5723  time: 36s
Epoch 2 - Score: 0.7103
INFO:__main__:Epoch 2 - Score: 0.7103
Epoch 2 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.2311(0.5723) 
f1 score : 0.11834319526627217
recall score : 0.06578947368421052
precision score : 0.5882352941176471
thresh : 0.36
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 55s) Loss: 0.6989(0.6989) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.4114(0.5162) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4474(0.5124) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5279(0.5140) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.3328(0.3328) 


Epoch 3 - avg_train_loss: 0.5140  avg_val_loss: 0.5622  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5140  avg_val_loss: 0.5622  time: 36s
Epoch 3 - Score: 0.7143
INFO:__main__:Epoch 3 - Score: 0.7143
Epoch 3 - Save Best Score: 0.7143 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7143 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0700(0.5622) 
f1 score : 0.41322314049586784
recall score : 0.32894736842105265
precision score : 0.5555555555555556
thresh : 0.5
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.4266(0.4266) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.2930(0.4260) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4972(0.4390) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4890(0.4382) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.2992(0.2992) 


Epoch 4 - avg_train_loss: 0.4382  avg_val_loss: 0.5896  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4382  avg_val_loss: 0.5896  time: 36s
Epoch 4 - Score: 0.7243
INFO:__main__:Epoch 4 - Score: 0.7243
Epoch 4 - Save Best Score: 0.7243 Model
INFO:__main__:Epoch 4 - Save Best Score: 0.7243 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.4227(0.5896) 
f1 score : 0.4513618677042802
recall score : 0.3815789473684211
precision score : 0.5523809523809524
thresh : 0.51
Epoch: [5][0/279] Elapsed 0m 0s (remain 2m 2s) Loss: 0.3116(0.3116) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.5420(0.3573) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2553(0.3525) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2004(0.3601) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4119(0.4119) 


Epoch 5 - avg_train_loss: 0.3601  avg_val_loss: 0.6299  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3601  avg_val_loss: 0.6299  time: 36s
Epoch 5 - Score: 0.7284
INFO:__main__:Epoch 5 - Score: 0.7284
Epoch 5 - Save Best Score: 0.7284 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7284 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3531(0.6299) 
f1 score : 0.5299684542586751
recall score : 0.5526315789473685
precision score : 0.509090909090909
thresh : 0.7
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.3202(0.3202) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.3166(0.2961) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.3053(0.2986) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2723(0.3030) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4096(0.4096) 


Epoch 6 - avg_train_loss: 0.3030  avg_val_loss: 0.6727  time: 37s
INFO:__main__:Epoch 6 - avg_train_loss: 0.3030  avg_val_loss: 0.6727  time: 37s
Epoch 6 - Score: 0.7223
INFO:__main__:Epoch 6 - Score: 0.7223


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.6999(0.6727) 
f1 score : 0.49836065573770494
recall score : 0.5
precision score : 0.49673202614379086
thresh : 0.69


Score: 0.7002
INFO:__main__:Score: 0.7002
ACC BEST Score: 0.7284
INFO:__main__:ACC BEST Score: 0.7284
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.5299684542586751
recall score : 0.5526315789473685
precision score : 0.509090909090909
thresh : 0.7


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 51s) Loss: 0.6041(0.6041) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.6212(0.6153) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.9318(0.6244) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3374(0.6262) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2652(0.2652) 


Epoch 1 - avg_train_loss: 0.6262  avg_val_loss: 0.6055  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6262  avg_val_loss: 0.6055  time: 36s
Epoch 1 - Score: 0.7103
INFO:__main__:Epoch 1 - Score: 0.7103
Epoch 1 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3234(0.6055) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.39
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.4794(0.4794) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.6278(0.5816) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.7376(0.5734) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5634(0.5749) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3597(0.3597) 


Epoch 2 - avg_train_loss: 0.5749  avg_val_loss: 0.5932  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5749  avg_val_loss: 0.5932  time: 36s
Epoch 2 - Score: 0.7103
INFO:__main__:Epoch 2 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0235(0.5932) 
f1 score : 0.3257918552036199
recall score : 0.23684210526315788
precision score : 0.5217391304347826
thresh : 0.53
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 49s) Loss: 0.4162(0.4162) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.7250(0.5509) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4478(0.5243) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.5070(0.5269) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3318(0.3318) 


Epoch 3 - avg_train_loss: 0.5269  avg_val_loss: 0.5756  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5269  avg_val_loss: 0.5756  time: 36s
Epoch 3 - Score: 0.7264
INFO:__main__:Epoch 3 - Score: 0.7264
Epoch 3 - Save Best Score: 0.7264 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7264 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0480(0.5756) 
f1 score : 0.3733333333333334
recall score : 0.27631578947368424
precision score : 0.5753424657534246
thresh : 0.53
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.5360(0.5360) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4652(0.4634) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5542(0.4544) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3149(0.4477) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3202(0.3202) 


Epoch 4 - avg_train_loss: 0.4477  avg_val_loss: 0.5986  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4477  avg_val_loss: 0.5986  time: 36s
Epoch 4 - Score: 0.7264
INFO:__main__:Epoch 4 - Score: 0.7264


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1710(0.5986) 
f1 score : 0.43122676579925656
recall score : 0.3815789473684211
precision score : 0.49572649572649574
thresh : 0.7
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 49s) Loss: 0.2719(0.2719) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.3772(0.3925) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2173(0.3881) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3404(0.3769) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4032(0.4032) 


Epoch 5 - avg_train_loss: 0.3769  avg_val_loss: 0.6523  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3769  avg_val_loss: 0.6523  time: 36s
Epoch 5 - Score: 0.7203
INFO:__main__:Epoch 5 - Score: 0.7203


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1359(0.6523) 
f1 score : 0.45360824742268036
recall score : 0.4342105263157895
precision score : 0.4748201438848921
thresh : 0.78
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.6213(0.6213) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.4787(0.3312) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1555(0.3245) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1925(0.3243) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3882(0.3882) 


Epoch 6 - avg_train_loss: 0.3243  avg_val_loss: 0.6862  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.3243  avg_val_loss: 0.6862  time: 36s
Epoch 6 - Score: 0.7183
INFO:__main__:Epoch 6 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3189(0.6862) 
f1 score : 0.45517241379310347
recall score : 0.4342105263157895
precision score : 0.4782608695652174
thresh : 0.78


Score: 0.7163
INFO:__main__:Score: 0.7163
ACC BEST Score: 0.7264
INFO:__main__:ACC BEST Score: 0.7264
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.3733333333333334
recall score : 0.27631578947368424
precision score : 0.5753424657534246
thresh : 0.53


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 47s) Loss: 0.9183(0.9183) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.6875(0.6769) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6457(0.6545) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6804(0.6379) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.3264(0.3264) 


Epoch 1 - avg_train_loss: 0.6379  avg_val_loss: 0.5841  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6379  avg_val_loss: 0.5841  time: 36s
Epoch 1 - Score: 0.7223
INFO:__main__:Epoch 1 - Score: 0.7223
Epoch 1 - Save Best Score: 0.7223 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7223 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.1465(0.5841) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
thresh : 0.33
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.5407(0.5407) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.5959(0.5894) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.6186(0.5867) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3129(0.5811) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.3086(0.3086) 


Epoch 2 - avg_train_loss: 0.5811  avg_val_loss: 0.5602  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5811  avg_val_loss: 0.5602  time: 36s
Epoch 2 - Score: 0.7103
INFO:__main__:Epoch 2 - Score: 0.7103


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.0383(0.5602) 
f1 score : 0.06329113924050632
recall score : 0.03289473684210526
precision score : 0.8333333333333334
thresh : 0.33
Epoch: [3][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.6051(0.6051) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.6562(0.5265) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5812(0.5318) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4773(0.5252) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5344(0.5344) 


Epoch 3 - avg_train_loss: 0.5252  avg_val_loss: 0.5746  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.5252  avg_val_loss: 0.5746  time: 36s
Epoch 3 - Score: 0.7163
INFO:__main__:Epoch 3 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.5490(0.5746) 
f1 score : 0.5410334346504561
recall score : 0.5855263157894737
precision score : 0.5028248587570622
thresh : 0.65
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.7542(0.7542) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.2571(0.4671) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4478(0.4488) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3452(0.4425) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.4349(0.4349) 


Epoch 4 - avg_train_loss: 0.4425  avg_val_loss: 0.5891  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.4425  avg_val_loss: 0.5891  time: 36s
Epoch 4 - Score: 0.7183
INFO:__main__:Epoch 4 - Score: 0.7183


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7163(0.5891) 
f1 score : 0.503448275862069
recall score : 0.48026315789473684
precision score : 0.5289855072463768
thresh : 0.63
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 44s) Loss: 0.3032(0.3032) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.1893(0.3727) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5122(0.3659) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.2514(0.3664) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 7s) Loss: 0.5097(0.5097) 


Epoch 5 - avg_train_loss: 0.3664  avg_val_loss: 0.6404  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.3664  avg_val_loss: 0.6404  time: 36s
Epoch 5 - Score: 0.7243
INFO:__main__:Epoch 5 - Score: 0.7243
Epoch 5 - Save Best Score: 0.7243 Model
INFO:__main__:Epoch 5 - Save Best Score: 0.7243 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.6359(0.6404) 
f1 score : 0.5279503105590063
recall score : 0.5592105263157895
precision score : 0.5
thresh : 0.7
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 45s) Loss: 0.5288(0.5288) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.0407(0.3056) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.1279(0.3240) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1507(0.3211) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4577(0.4577) 


Epoch 6 - avg_train_loss: 0.3211  avg_val_loss: 0.6797  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.3211  avg_val_loss: 0.6797  time: 36s
Epoch 6 - Score: 0.7163
INFO:__main__:Epoch 6 - Score: 0.7163


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8252(0.6797) 
f1 score : 0.4657534246575342
recall score : 0.4473684210526316
precision score : 0.4857142857142857
thresh : 0.71


Score: 0.6942
INFO:__main__:Score: 0.6942
ACC BEST Score: 0.7243
INFO:__main__:ACC BEST Score: 0.7243
DebertaV2Config {
  "_name_or_path": "/content/drive/MyDrive/Competitions/probspace/\u7814\u7a76\u8ad6\u6587\u306e\u56fd\u969b\u5b66\u4f1a\u63a1\u629e\u4e88\u6e2c/output/clrp_deberta_v3_base_epoch20",
  "architectures": [
    "DebertaV2ForMaskedLM"
  ],
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input"

f1 score : 0.5279503105590063
recall score : 0.5592105263157895
precision score : 0.5
thresh : 0.7


Some weights of the model checkpoint at /content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測/output/clrp_deberta_v3_base_epoch20 were not used when initializing DebertaV2Model: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Reinitializing Last 1 Layers.
Epoch: [1][0/279] Elapsed 0m 0s (remain 1m 59s) Loss: 1.0178(1.0178) LR: 0.00000074  
Epoch: [1][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.7529(0.6195) LR: 0.00001752  
Epoch: [1][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5565(0.6138) LR: 0.00001452  
Epoch: [1][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4485(0.6103) LR: 0.00001244  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4341(0.4341) 


Epoch 1 - avg_train_loss: 0.6103  avg_val_loss: 0.5976  time: 36s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6103  avg_val_loss: 0.5976  time: 36s
Epoch 1 - Score: 0.7022
INFO:__main__:Epoch 1 - Score: 0.7022
Epoch 1 - Save Best Score: 0.7022 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7022 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8468(0.5976) 
f1 score : 0.3167420814479638
recall score : 0.23026315789473684
precision score : 0.5072463768115942
thresh : 0.57
Epoch: [2][0/279] Elapsed 0m 0s (remain 1m 54s) Loss: 0.5031(0.5031) LR: 0.00001242  
Epoch: [2][100/279] Elapsed 0m 12s (remain 0m 21s) Loss: 0.7086(0.5703) LR: 0.00001008  
Epoch: [2][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2331(0.5642) LR: 0.00000807  
Epoch: [2][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.6604(0.5606) LR: 0.00000673  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4357(0.4357) 


Epoch 2 - avg_train_loss: 0.5606  avg_val_loss: 0.5914  time: 36s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5606  avg_val_loss: 0.5914  time: 36s
Epoch 2 - Score: 0.7103
INFO:__main__:Epoch 2 - Score: 0.7103
Epoch 2 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7103 Model


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.8355(0.5914) 
f1 score : 0.4444444444444444
recall score : 0.40789473684210525
precision score : 0.4881889763779528
thresh : 0.56
Epoch: [3][0/279] Elapsed 0m 0s (remain 2m 5s) Loss: 0.5014(0.5014) LR: 0.00000671  
Epoch: [3][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.3796(0.4941) LR: 0.00000524  
Epoch: [3][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.5622(0.4764) LR: 0.00000403  
Epoch: [3][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.4005(0.4658) LR: 0.00000325  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.4393(0.4393) 


Epoch 3 - avg_train_loss: 0.4658  avg_val_loss: 0.6287  time: 36s
INFO:__main__:Epoch 3 - avg_train_loss: 0.4658  avg_val_loss: 0.6287  time: 36s
Epoch 3 - Score: 0.7082
INFO:__main__:Epoch 3 - Score: 0.7082


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 0.7729(0.6287) 
f1 score : 0.48214285714285715
recall score : 0.5328947368421053
precision score : 0.44021739130434784
thresh : 0.73
Epoch: [4][0/279] Elapsed 0m 0s (remain 1m 46s) Loss: 0.3369(0.3369) LR: 0.00000324  
Epoch: [4][100/279] Elapsed 0m 11s (remain 0m 20s) Loss: 0.1448(0.3028) LR: 0.00000244  
Epoch: [4][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.4296(0.3097) LR: 0.00000182  
Epoch: [4][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.3093(0.3075) LR: 0.00000146  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.3919(0.3919) 


Epoch 4 - avg_train_loss: 0.3075  avg_val_loss: 0.7832  time: 36s
INFO:__main__:Epoch 4 - avg_train_loss: 0.3075  avg_val_loss: 0.7832  time: 36s
Epoch 4 - Score: 0.6821
INFO:__main__:Epoch 4 - Score: 0.6821


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 1.3732(0.7832) 
f1 score : 0.40397350993377484
recall score : 0.40131578947368424
precision score : 0.4066666666666667
thresh : 0.79
Epoch: [5][0/279] Elapsed 0m 0s (remain 1m 54s) Loss: 0.3380(0.3380) LR: 0.00000146  
Epoch: [5][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.3325(0.1905) LR: 0.00000112  
Epoch: [5][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.2909(0.1795) LR: 0.00000090  
Epoch: [5][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.0874(0.1760) LR: 0.00000080  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2573(0.2573) 


Epoch 5 - avg_train_loss: 0.1760  avg_val_loss: 1.0599  time: 36s
INFO:__main__:Epoch 5 - avg_train_loss: 0.1760  avg_val_loss: 1.0599  time: 36s
Epoch 5 - Score: 0.6841
INFO:__main__:Epoch 5 - Score: 0.6841


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 2.4146(1.0599) 
f1 score : 0.35606060606060613
recall score : 0.3092105263157895
precision score : 0.41964285714285715
thresh : 0.75
Epoch: [6][0/279] Elapsed 0m 0s (remain 1m 53s) Loss: 0.0629(0.0629) LR: 0.00000080  
Epoch: [6][100/279] Elapsed 0m 11s (remain 0m 21s) Loss: 0.2481(0.1156) LR: 0.00000073  
Epoch: [6][200/279] Elapsed 0m 23s (remain 0m 9s) Loss: 0.0640(0.1056) LR: 0.00000070  
Epoch: [6][278/279] Elapsed 0m 32s (remain 0m 0s) Loss: 0.1271(0.1061) LR: 0.00000070  
EVAL: [0/16] Elapsed 0m 0s (remain 0m 6s) Loss: 0.2494(0.2494) 


Epoch 6 - avg_train_loss: 0.1061  avg_val_loss: 1.2392  time: 36s
INFO:__main__:Epoch 6 - avg_train_loss: 0.1061  avg_val_loss: 1.2392  time: 36s
Epoch 6 - Score: 0.6861
INFO:__main__:Epoch 6 - Score: 0.6861


EVAL: [15/16] Elapsed 0m 3s (remain 0m 0s) Loss: 2.9413(1.2392) 
f1 score : 0.35877862595419846
recall score : 0.3092105263157895
precision score : 0.42727272727272725
thresh : 0.79


Score: 0.6881
INFO:__main__:Score: 0.6881
ACC BEST Score: 0.7103
INFO:__main__:ACC BEST Score: 0.7103
Score: 0.7008
INFO:__main__:Score: 0.7008
ACC BEST Score: 0.7073
INFO:__main__:ACC BEST Score: 0.7073


f1 score : 0.4444444444444444
recall score : 0.40789473684210525
precision score : 0.4881889763779528
thresh : 0.56
f1 score : 0.4210116731517509
recall score : 0.3552199606040709
precision score : 0.5167144221585482
thresh : 0.56


In [None]:
from google.colab import runtime
runtime.unassign()