In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
!pip install transformers
!pip install datasets
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m85.4 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m105.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.1 tokenizers-0.13.2 transformers-4.26.1
Looking in indexes: https://pypi.org/simple, https://u

In [3]:
!nvidia-smi

Sat Feb 18 06:09:52 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    52W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:

import os
import gc
import math
import time
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')
from tqdm import tqdm
import re
import html

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import Adam, SGD, AdamW, RAdam
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset

from sklearn.model_selection import StratifiedKFold,StratifiedGroupKFold,GroupKFold
from sklearn.metrics import log_loss,f1_score, recall_score, accuracy_score, precision_score

from transformers import AutoModel, AutoConfig, AutoTokenizer, AdamW, DataCollatorWithPadding
from transformers import get_linear_schedule_with_warmup, get_cosine_schedule_with_warmup

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:

# ====================================================
# CFG
# ====================================================
class CFG:
    debug=False
    apex=True
    print_freq=100
    num_workers=4
    model="microsoft/deberta-v3-base"
    # model='microsoft/deberta-base'
    # model='roberta-base'
    # model='roberta-large'
    # model='roberta-large-mnli'
    # model='xlnet-large-cased'
    # model='albert-xxlarge-v2'
    # model="microsoft/deberta-large"
    # model="microsoft/deberta-v3-large"
    # model='microsoft/deberta-v2-xlarge'
    # model='funnel-transformer/large'
    # model='funnel-transformer/medium'
    # model='albert-base-v2'
    # model='albert-large-v2'
    # model='google/electra-large-discriminator'
    # model='google/electra-base-discriminator'
    # model="facebook/bart-large-mnli"
    # model="facebook/bart-large"
    # model="facebook/bart-base"
    scheduler='cosine' # ['linear', 'cosine']
    batch_scheduler=True
    num_cycles=0.5
    num_warmup_steps=0
    epochs=6
    encoder_lr=2e-5
    decoder_lr=2e-5
    min_lr=1e-6
    eps=1e-6
    betas=(0.9, 0.999)
    batch_size=16
    fc_dropout=0.2
    target_size=1
    max_len=256
    weight_decay=0.01
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    n_fold=5
    trn_fold=[0, 1, 2, 3, 4]
    train=True
    nth_awp_start_epoch=1
    gradient_checkpointing = True
    freezing = True
    clean_content = True

if CFG.debug:
    CFG.epochs = 2
    CFG.trn_fold = [0, 1]

In [6]:
DIR = "/content/drive/MyDrive/Competitions/probspace/研究論文の国際学会採択予測"
INPUT_DIR = os.path.join(DIR,"input")
OUTPUT_DIR = os.path.join(DIR,"output")
OUTPUT_EXP_DIR = DIR + '/output/EXP010/'
if not os.path.exists(OUTPUT_EXP_DIR):
    os.makedirs(OUTPUT_EXP_DIR)

In [7]:
def get_score(labels, outputs):
    thresh = 0.5
    y_pred = outputs
    y_true = labels
    f_score = f1_score(y_true, (y_pred>thresh).astype(int))
    r_score = recall_score(y_true, (y_pred>thresh).astype(int))
    p_score = precision_score(y_true, (y_pred>thresh).astype(int))
    print(f"f1 score : {f_score}")
    print(f"recall score : {r_score}")
    print(f"precision score : {p_score}")
    return accuracy_score(y_true, (y_pred>thresh).astype(int))

def get_acc_score(labels, outputs):
    y_pred = outputs
    y_true = labels
    best_score = 0
    best_thresh = 0.5
    for thresh in np.arange(0.1, 0.80, 0.01):
        thresh = np.round(thresh, 2)
        score = accuracy_score(y_true, (y_pred>thresh).astype(int))
        #print("Accuracy score at threshold {0} is {1}".format(thresh, score))
        if score > best_score:
          best_score = score
          best_thresh = thresh
    return accuracy_score(y_true, (y_pred>best_thresh).astype(int))


def get_logger(filename=OUTPUT_EXP_DIR+'train'):
    from logging import getLogger, INFO, FileHandler, Formatter, StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=f"{filename}.log")
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = get_logger()

def seed_everything(seed=CFG.seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(seed=CFG.seed)

In [8]:
def freeze(module):
    """
    Freezes module's parameters.
    """
    
    for parameter in module.parameters():
        parameter.requires_grad = False
        
def get_freezed_parameters(module):
    """
    Returns names of freezed parameters of the given module.
    """
    
    freezed_parameters = []
    for name, parameter in module.named_parameters():
        if not parameter.requires_grad:
            freezed_parameters.append(name)
            
    return freezed_parameters

def set_embedding_parameters_bits(embeddings_path, optim_bits=32):
    """
    https://github.com/huggingface/transformers/issues/14819#issuecomment-1003427930
    """
    
    embedding_types = ("word", "position", "token_type")
    for embedding_type in embedding_types:
        attr_name = f"{embedding_type}_embeddings"
        
        if hasattr(embeddings_path, attr_name): 
            bnb.optim.GlobalOptimManager.get_instance().register_module_override(
                getattr(embeddings_path, attr_name), 'weight', {'optim_bits': optim_bits}
            )

In [9]:
import pandas as pd
import numpy as np

train = pd.read_csv(os.path.join(INPUT_DIR,"train_data.csv"))
test = pd.read_csv(os.path.join(INPUT_DIR,"test_data.csv"))
sample_sub = pd.read_csv(os.path.join(INPUT_DIR,"submission.csv"))

print(train.shape)
display(train.head(3))

print(test.shape)
display(test.head(3))

print(sample_sub.shape)
display(sample_sub.head(3))

(4974, 6)


Unnamed: 0,id,title,year,abstract,keywords,y
0,1,Hierarchical Adversarially Learned Inference,2018,We propose a novel hierarchical generative mod...,"generative, hierarchical, unsupervised, semisu...",0
1,2,Learning to Compute Word Embeddings On the Fly,2018,Words in natural language follow a Zipfian dis...,"NLU, word embeddings, representation learning",0
2,3,Graph2Seq: Scalable Learning Dynamics for Graphs,2018,Neural networks are increasingly used as a gen...,,0


(6393, 5)


Unnamed: 0,id,title,year,abstract,keywords
0,1,StyleAlign: Analysis and Applications of Align...,2022,"In this paper, we perform an in-depth study of...","StyleGAN, transfer learning, fine tuning, mode..."
1,2,Embedding a random graph via GNN: mean-field i...,2021,We develop a theory for embedding a random gra...,"Graph neural network, graph embedding, multi-r..."
2,3,BBRefinement: an universal scheme to improve p...,2021,We present a conceptually simple yet powerful ...,"object detection, deep neural networks, refine..."


(6393, 2)


Unnamed: 0,id,y
0,1,0
1,2,0
2,3,0


In [10]:
train["texts"] = train["abstract"]  

In [11]:
skf = StratifiedKFold(n_splits=CFG.n_fold,shuffle=True,random_state=CFG.seed)
for fold, ( _, val_) in enumerate(skf.split(train, train.y)):
    train.loc[val_ , "kfold"] = int(fold)
    
train["kfold"] = train["kfold"].astype(int)

if CFG.debug:
    display(train.groupby('kfold').size())
    train = train.sample(n=500, random_state=0).reset_index(drop=True)
    display(train.groupby('kfold').size())

In [12]:
# ====================================================
# tokenizer
# ====================================================
tokenizer = AutoTokenizer.from_pretrained(CFG.model)
tokenizer.save_pretrained(OUTPUT_EXP_DIR+'tokenizer/')
CFG.tokenizer = tokenizer

Downloading (…)okenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

Downloading (…)"spm.model";:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [13]:
# ====================================================
# Define max_len
# ====================================================
lengths = []
tk0 = tqdm(train['texts'].fillna("").values, total=len(train))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
CFG.max_len = max(lengths) + 2 # cls
LOGGER.info(f"max_len: {CFG.max_len}")

100%|██████████| 4974/4974 [00:03<00:00, 1421.96it/s]
max_len: 511
INFO:__main__:max_len: 511


In [14]:
# ====================================================
# Dataset
# ====================================================
def prepare_input(cfg, text):
    inputs = cfg.tokenizer(text,
                           add_special_tokens=True,
                           max_length=cfg.max_len,
                           padding="max_length",
                           return_offsets_mapping=False,
                           truncation=True)
    for k, v in inputs.items():
        inputs[k] = torch.tensor(v, dtype=torch.long)
    return inputs


class TrainDataset(Dataset):
    def __init__(self, cfg, df):
        self.cfg = cfg
        self.inputs = df['texts'].values
        self.labels = df['y'].values

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, item):
        inputs = prepare_input(self.cfg, self.inputs[item])
        label = torch.tensor(self.labels[item], dtype=torch.float)
        return inputs, label

def collate(inputs):
    mask_len = int(inputs["attention_mask"].sum(axis=1).max())
    for k, v in inputs.items():
        inputs[k] = inputs[k][:,:mask_len]
    return inputs

#collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [15]:
# ====================================================
# Model
# ====================================================
class MeanPooling(nn.Module):
    def __init__(self):
        super(MeanPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1)
        sum_mask = input_mask_expanded.sum(1)
        sum_mask = torch.clamp(sum_mask, min=1e-9)
        mean_embeddings = sum_embeddings / sum_mask
        return mean_embeddings

class MaxPooling(nn.Module):
    def __init__(self):
        super(MaxPooling, self).__init__()
        
    def forward(self, last_hidden_state, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        embeddings = last_hidden_state.clone()
        embeddings[input_mask_expanded == 0] = -1e4
        max_embeddings, _ = torch.max(embeddings, dim=1)
        return max_embeddings
    

class CustomModel(nn.Module):
    def __init__(self, cfg, config_path=None, pretrained=False):
        super().__init__()
        self.cfg = cfg
        if config_path is None:
            self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)
            self.config.hidden_dropout = 0.
            self.config.hidden_dropout_prob = 0.
            self.config.attention_dropout = 0.
            self.config.attention_probs_dropout_prob = 0.
            LOGGER.info(self.config)
        else:
            self.config = torch.load(config_path)
        if pretrained:
            self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
        else:
            self.model = AutoModel(self.config)
        if self.cfg.gradient_checkpointing:
            self.model.gradient_checkpointing_enable()

        # Freezing
        if cfg.freezing:
            # freezing embeddings and first 2 layers of encoder
            freeze((self.model).embeddings)
            freeze((self.model).encoder.layer[:2])
            cfg.after_freezed_parameters = filter(lambda parameter: parameter.requires_grad, (self.model).parameters())

        self.pool = MeanPooling()
        self.fc = nn.Linear(self.config.hidden_size, cfg.target_size)
        self._init_weights(self.fc)
        
    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.bias is not None:
                module.bias.data.zero_()
        elif isinstance(module, nn.Embedding):
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
            if module.padding_idx is not None:
                module.weight.data[module.padding_idx].zero_()
        elif isinstance(module, nn.LayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        
    def feature(self, inputs):
        outputs = self.model(**inputs)
        last_hidden_states = outputs[0]
        feature = self.pool(last_hidden_states, inputs['attention_mask'])
        return feature

    def forward(self, inputs):
        feature = self.feature(inputs)
        output = self.fc(feature)
        return output

In [16]:

# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))


def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    model.train()
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.apex)
    losses = AverageMeter()
    start = end = time.time()
    global_step = 0
    for step, (inputs, labels) in enumerate(train_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.cuda.amp.autocast(enabled=CFG.apex):
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            global_step += 1
            if CFG.batch_scheduler:
                scheduler.step()
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.8f}  '
                  .format(epoch+1, step, len(train_loader), 
                          remain=timeSince(start, float(step+1)/len(train_loader)),
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))

    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    losses = AverageMeter()
    model.eval()
    preds = []
    start = end = time.time()
    for step, (inputs, labels) in enumerate(valid_loader):
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        with torch.no_grad():
            y_preds = model(inputs)
        loss = criterion(y_preds.view(-1, 1), labels.view(-1, 1))
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        losses.update(loss.item(), batch_size)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
        end = time.time()
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Elapsed {remain:s} '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          remain=timeSince(start, float(step+1)/len(valid_loader))))
    predictions = np.concatenate(preds)
    predictions = np.concatenate(predictions)
    return losses.avg, predictions


def inference_fn(test_loader, model, device):
    preds = []
    model.eval()
    model.to(device)
    tk0 = tqdm(test_loader, total=len(test_loader))
    for inputs in tk0:
        inputs = collate(inputs)
        for k, v in inputs.items():
            inputs[k] = v.to(device)
        with torch.no_grad():
            y_preds = model(inputs)
        preds.append(y_preds.sigmoid().to('cpu').numpy())
    predictions = np.concatenate(preds)
    return predictions

In [17]:

# ====================================================
# train loop
# ====================================================
def train_loop(folds, fold):
    
    LOGGER.info(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    train_folds = folds[folds['kfold'] != fold].reset_index(drop=True)
    valid_folds = folds[folds['kfold'] == fold].reset_index(drop=True)
    valid_labels = valid_folds['y'].values
    
    train_dataset = TrainDataset(CFG, train_folds)
    valid_dataset = TrainDataset(CFG, valid_folds)


    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size,
                              shuffle=True,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=CFG.batch_size*2,
                              shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)

    # ====================================================
    # model & optimizer
    # ====================================================
    model = CustomModel(CFG, config_path=None, pretrained=True)
    torch.save(model.config, OUTPUT_EXP_DIR+'config.pth')
    model.to(device)
    
    def get_optimizer_params(model, encoder_lr, decoder_lr, weight_decay=0.0):
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {'params': [p for n, p in model.model.named_parameters() if not any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': weight_decay},
            {'params': [p for n, p in model.model.named_parameters() if any(nd in n for nd in no_decay)],
             'lr': encoder_lr, 'weight_decay': 0.0},
            {'params': [p for n, p in model.named_parameters() if "model" not in n],
             'lr': decoder_lr, 'weight_decay': 0.0}
        ]
        return optimizer_parameters

    optimizer_parameters = get_optimizer_params(model,
                                                encoder_lr=CFG.encoder_lr, 
                                                decoder_lr=CFG.decoder_lr,
                                                weight_decay=CFG.weight_decay)
    optimizer = AdamW(optimizer_parameters, lr=CFG.encoder_lr, eps=CFG.eps, betas=CFG.betas)
    
    # ====================================================
    # scheduler
    # ====================================================
    def get_scheduler(cfg, optimizer, num_train_steps):
        if cfg.scheduler == 'linear':
            scheduler = get_linear_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps
            )
        elif cfg.scheduler == 'cosine':
            scheduler = get_cosine_schedule_with_warmup(
                optimizer, num_warmup_steps=cfg.num_warmup_steps, num_training_steps=num_train_steps, num_cycles=cfg.num_cycles
            )
        return scheduler
    
    num_train_steps = int(len(train_folds) / CFG.batch_size * CFG.epochs)
    scheduler = get_scheduler(CFG, optimizer, num_train_steps)

    # ====================================================
    # loop
    # ====================================================
    criterion = nn.BCEWithLogitsLoss(reduction="mean")
    
    best_score = -1.

    for epoch in range(CFG.epochs):

        start_time = time.time()

        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)

        # eval
        avg_val_loss, predictions = valid_fn(valid_loader, model, criterion, device)
        
        # scoring
        score = get_score(valid_labels, predictions)

        elapsed = time.time() - start_time

        LOGGER.info(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')

        
        if best_score < score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(),
                        'predictions': predictions},
                        OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth")

    predictions = torch.load(OUTPUT_EXP_DIR+f"{CFG.model.replace('/', '-')}_fold{fold}_best.pth", 
                             map_location=torch.device('cpu'))['predictions']
    valid_folds['pred'] = predictions

    torch.cuda.empty_cache()
    gc.collect()
    
    return valid_folds

In [18]:
if __name__ == '__main__':
    
    def get_result(oof_df):
        labels = oof_df['y'].values
        preds = oof_df['pred'].values
        score = get_score(labels, preds)
        acc_score = get_acc_score(labels, preds)
        LOGGER.info(f'Score: {score:<.4f}')
        LOGGER.info(f'ACC BEST Score: {acc_score:<.4f}')
    
    if CFG.train:
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
            #break
        oof_df = oof_df.reset_index(drop=True)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        oof_df.to_pickle(OUTPUT_EXP_DIR+'oof_df.pkl')

DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__:DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "at

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of the model checkpoint at microsoft/deberta-v3-base were not used when initializing DebertaV2Model: ['lm_predictions.lm_head.bias', 'mask_predictions.classifier.bias', 'mask_predictions.LayerNorm.weight', 'mask_predictions.dense.bias', 'mask_predictions.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'mask_predictions.classifier.weight', 'mask_predictions.dense.weight']
- This IS expected if you are initializing DebertaV2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaV2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Epoch: [1][0/248] Elapsed 0m 3s (remain 15m 49s) Loss: 1.0549(1.0549) Grad: nan  LR: 0.00002000  
Epoch: [1][100/248] Elapsed 0m 17s (remain 0m 24s) Loss: 0.6600(0.6450) Grad: 1.4553  LR: 0.00001977  
Epoch: [1][200/248] Elapsed 0m 30s (remain 0m 7s) Loss: 0.5625(0.6307) Grad: 5.0205  LR: 0.00001912  
Epoch: [1][247/248] Elapsed 0m 36s (remain 0m 0s) Loss: 0.6086(0.6249) Grad: 2.6001  LR: 0.00001867  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.4940(0.4940) 


Epoch 1 - avg_train_loss: 0.6249  avg_val_loss: 0.6189  time: 43s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6249  avg_val_loss: 0.6189  time: 43s
Epoch 1 - Score: 0.6945
INFO:__main__:Epoch 1 - Score: 0.6945
Epoch 1 - Save Best Score: 0.6945 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6945 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.8263(0.6189) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/248] Elapsed 0m 0s (remain 1m 22s) Loss: 0.5715(0.5715) Grad: nan  LR: 0.00001866  
Epoch: [2][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.6149(0.5898) Grad: 3.2504  LR: 0.00001742  
Epoch: [2][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.5797(0.5930) Grad: 2.2511  LR: 0.00001585  
Epoch: [2][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.6060(0.5894) Grad: 2.1094  LR: 0.00001502  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.3968(0.3968) 


Epoch 2 - avg_train_loss: 0.5894  avg_val_loss: 0.5772  time: 40s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5894  avg_val_loss: 0.5772  time: 40s
Epoch 2 - Score: 0.7025
INFO:__main__:Epoch 2 - Score: 0.7025
Epoch 2 - Save Best Score: 0.7025 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7025 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.7373(0.5772) 
f1 score : 0.23316062176165803
recall score : 0.14802631578947367
precision score : 0.5487804878048781
Epoch: [3][0/248] Elapsed 0m 0s (remain 1m 26s) Loss: 0.4509(0.4509) Grad: nan  LR: 0.00001501  
Epoch: [3][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.4506(0.4905) Grad: 2.4679  LR: 0.00001309  
Epoch: [3][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.6534(0.4791) Grad: 5.4755  LR: 0.00001103  
Epoch: [3][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.3900(0.4732) Grad: 3.7884  LR: 0.00001004  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.2190(0.2190) 


Epoch 3 - avg_train_loss: 0.4732  avg_val_loss: 0.6278  time: 40s
INFO:__main__:Epoch 3 - avg_train_loss: 0.4732  avg_val_loss: 0.6278  time: 40s
Epoch 3 - Score: 0.7085
INFO:__main__:Epoch 3 - Score: 0.7085
Epoch 3 - Save Best Score: 0.7085 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7085 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.8119(0.6278) 
f1 score : 0.3028846153846154
recall score : 0.20723684210526316
precision score : 0.5625
Epoch: [4][0/248] Elapsed 0m 0s (remain 1m 24s) Loss: 0.2572(0.2572) Grad: nan  LR: 0.00001002  
Epoch: [4][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0691(0.1999) Grad: 6.0367  LR: 0.00000793  
Epoch: [4][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0844(0.1877) Grad: 4.7989  LR: 0.00000593  
Epoch: [4][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0539(0.1856) Grad: 3.0729  LR: 0.00000505  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.4996(0.4996) 


Epoch 4 - avg_train_loss: 0.1856  avg_val_loss: 0.8538  time: 40s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1856  avg_val_loss: 0.8538  time: 40s
Epoch 4 - Score: 0.6693
INFO:__main__:Epoch 4 - Score: 0.6693


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.9805(0.8538) 
f1 score : 0.41144901610017887
recall score : 0.3782894736842105
precision score : 0.45098039215686275
Epoch: [5][0/248] Elapsed 0m 0s (remain 1m 19s) Loss: 0.0421(0.0421) Grad: nan  LR: 0.00000503  
Epoch: [5][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0638(0.0446) Grad: 5.0085  LR: 0.00000333  
Epoch: [5][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0108(0.0380) Grad: 0.7836  LR: 0.00000192  
Epoch: [5][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0139(0.0365) Grad: 0.9457  LR: 0.00000138  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.6777(0.6777) 


Epoch 5 - avg_train_loss: 0.0365  avg_val_loss: 1.0163  time: 40s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0365  avg_val_loss: 1.0163  time: 40s
Epoch 5 - Score: 0.6673
INFO:__main__:Epoch 5 - Score: 0.6673


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.8408(1.0163) 
f1 score : 0.4283246977547495
recall score : 0.40789473684210525
precision score : 0.4509090909090909
Epoch: [6][0/248] Elapsed 0m 0s (remain 1m 17s) Loss: 0.0152(0.0152) Grad: nan  LR: 0.00000136  
Epoch: [6][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0120(0.0149) Grad: 1.0296  LR: 0.00000050  
Epoch: [6][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0126(0.0151) Grad: 1.0002  LR: 0.00000006  
Epoch: [6][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0157(0.0144) Grad: 0.8080  LR: 0.00000000  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.5443(0.5443) 


Epoch 6 - avg_train_loss: 0.0144  avg_val_loss: 1.0538  time: 40s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0144  avg_val_loss: 1.0538  time: 40s
Epoch 6 - Score: 0.6784
INFO:__main__:Epoch 6 - Score: 0.6784


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.2138(1.0538) 
f1 score : 0.4029850746268656
recall score : 0.35526315789473684
precision score : 0.46551724137931033


Score: 0.7085
INFO:__main__:Score: 0.7085
ACC BEST Score: 0.7226
INFO:__main__:ACC BEST Score: 0.7226


f1 score : 0.3028846153846154
recall score : 0.20723684210526316
precision score : 0.5625


DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__:DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "at

Epoch: [1][0/248] Elapsed 0m 0s (remain 1m 21s) Loss: 0.6380(0.6380) Grad: nan  LR: 0.00002000  
Epoch: [1][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.7069(0.6273) Grad: 3.9402  LR: 0.00001977  
Epoch: [1][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.4347(0.6148) Grad: 4.2498  LR: 0.00001912  
Epoch: [1][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.6155(0.6146) Grad: 5.8943  LR: 0.00001867  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.4803(0.4803) 


Epoch 1 - avg_train_loss: 0.6146  avg_val_loss: 0.6084  time: 40s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6146  avg_val_loss: 0.6084  time: 40s
Epoch 1 - Score: 0.7015
INFO:__main__:Epoch 1 - Score: 0.7015
Epoch 1 - Save Best Score: 0.7015 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.7015 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.8073(0.6084) 
f1 score : 0.09174311926605504
recall score : 0.04918032786885246
precision score : 0.6818181818181818
Epoch: [2][0/248] Elapsed 0m 0s (remain 1m 26s) Loss: 0.4605(0.4605) Grad: nan  LR: 0.00001866  
Epoch: [2][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.5418(0.5628) Grad: 3.7506  LR: 0.00001742  
Epoch: [2][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.6274(0.5378) Grad: 8.1694  LR: 0.00001585  
Epoch: [2][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.5922(0.5429) Grad: 6.2974  LR: 0.00001502  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.5388(0.5388) 


Epoch 2 - avg_train_loss: 0.5429  avg_val_loss: 0.6112  time: 40s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5429  avg_val_loss: 0.6112  time: 40s
Epoch 2 - Score: 0.6623
INFO:__main__:Epoch 2 - Score: 0.6623


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.7052(0.6112) 
f1 score : 0.5227272727272728
recall score : 0.6032786885245902
precision score : 0.46115288220551376
Epoch: [3][0/248] Elapsed 0m 0s (remain 1m 21s) Loss: 0.4310(0.4310) Grad: nan  LR: 0.00001501  
Epoch: [3][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.1449(0.3285) Grad: 4.9274  LR: 0.00001309  
Epoch: [3][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.2876(0.2918) Grad: 7.1764  LR: 0.00001103  
Epoch: [3][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.2364(0.2869) Grad: 7.1895  LR: 0.00001004  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.4405(0.4405) 


Epoch 3 - avg_train_loss: 0.2869  avg_val_loss: 0.6772  time: 40s
INFO:__main__:Epoch 3 - avg_train_loss: 0.2869  avg_val_loss: 0.6772  time: 40s
Epoch 3 - Score: 0.6874
INFO:__main__:Epoch 3 - Score: 0.6874


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.5267(0.6772) 
f1 score : 0.4755480607082631
recall score : 0.46229508196721314
precision score : 0.4895833333333333
Epoch: [4][0/248] Elapsed 0m 0s (remain 1m 23s) Loss: 0.0627(0.0627) Grad: nan  LR: 0.00001002  
Epoch: [4][100/248] Elapsed 0m 14s (remain 0m 20s) Loss: 0.0210(0.0484) Grad: 1.6842  LR: 0.00000793  
Epoch: [4][200/248] Elapsed 0m 26s (remain 0m 6s) Loss: 0.0062(0.0399) Grad: 0.3277  LR: 0.00000593  
Epoch: [4][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0103(0.0368) Grad: 1.0617  LR: 0.00000505  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.4371(0.4371) 


Epoch 4 - avg_train_loss: 0.0368  avg_val_loss: 0.9152  time: 40s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0368  avg_val_loss: 0.9152  time: 40s
Epoch 4 - Score: 0.6794
INFO:__main__:Epoch 4 - Score: 0.6794


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 3.1589(0.9152) 
f1 score : 0.4272890484739677
recall score : 0.3901639344262295
precision score : 0.4722222222222222
Epoch: [5][0/248] Elapsed 0m 0s (remain 1m 20s) Loss: 0.0144(0.0144) Grad: nan  LR: 0.00000503  
Epoch: [5][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.0053(0.0067) Grad: 0.4327  LR: 0.00000333  
Epoch: [5][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0037(0.0058) Grad: 0.2391  LR: 0.00000192  
Epoch: [5][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0052(0.0055) Grad: 0.4247  LR: 0.00000138  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.3743(0.3743) 


Epoch 5 - avg_train_loss: 0.0055  avg_val_loss: 1.0359  time: 40s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0055  avg_val_loss: 1.0359  time: 40s
Epoch 5 - Score: 0.6915
INFO:__main__:Epoch 5 - Score: 0.6915


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 3.9864(1.0359) 
f1 score : 0.3968565815324165
recall score : 0.33114754098360655
precision score : 0.4950980392156863
Epoch: [6][0/248] Elapsed 0m 0s (remain 1m 25s) Loss: 0.0042(0.0042) Grad: nan  LR: 0.00000136  
Epoch: [6][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0043(0.0039) Grad: 0.2151  LR: 0.00000050  
Epoch: [6][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0045(0.0039) Grad: 0.2073  LR: 0.00000006  
Epoch: [6][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0047(0.0040) Grad: 0.2575  LR: 0.00000000  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.3513(0.3513) 


Epoch 6 - avg_train_loss: 0.0040  avg_val_loss: 1.0570  time: 40s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0040  avg_val_loss: 1.0570  time: 40s
Epoch 6 - Score: 0.6925
INFO:__main__:Epoch 6 - Score: 0.6925


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 4.1570(1.0570) 
f1 score : 0.38799999999999996
recall score : 0.3180327868852459
precision score : 0.49743589743589745


Score: 0.7015
INFO:__main__:Score: 0.7015
ACC BEST Score: 0.7015
INFO:__main__:ACC BEST Score: 0.7015


f1 score : 0.09174311926605504
recall score : 0.04918032786885246
precision score : 0.6818181818181818


DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__:DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "at

Epoch: [1][0/248] Elapsed 0m 0s (remain 1m 27s) Loss: 0.5722(0.5722) Grad: nan  LR: 0.00002000  
Epoch: [1][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.5220(0.6193) Grad: 4.0309  LR: 0.00001977  
Epoch: [1][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.7419(0.6095) Grad: 6.9531  LR: 0.00001912  
Epoch: [1][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.5659(0.6116) Grad: 1.4710  LR: 0.00001867  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.3993(0.3993) 


Epoch 1 - avg_train_loss: 0.6116  avg_val_loss: 0.5899  time: 40s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6116  avg_val_loss: 0.5899  time: 40s
Epoch 1 - Score: 0.6945
INFO:__main__:Epoch 1 - Score: 0.6945
Epoch 1 - Save Best Score: 0.6945 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6945 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.9106(0.5899) 
f1 score : 0.01935483870967742
recall score : 0.009836065573770493
precision score : 0.6
Epoch: [2][0/248] Elapsed 0m 0s (remain 1m 25s) Loss: 0.4985(0.4985) Grad: nan  LR: 0.00001866  
Epoch: [2][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.5568(0.5225) Grad: 6.6891  LR: 0.00001742  
Epoch: [2][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.6200(0.5367) Grad: 3.2174  LR: 0.00001585  
Epoch: [2][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.4905(0.5344) Grad: 3.4257  LR: 0.00001502  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.4040(0.4040) 


Epoch 2 - avg_train_loss: 0.5344  avg_val_loss: 0.5805  time: 40s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5344  avg_val_loss: 0.5805  time: 40s
Epoch 2 - Score: 0.7015
INFO:__main__:Epoch 2 - Score: 0.7015
Epoch 2 - Save Best Score: 0.7015 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7015 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.9321(0.5805) 
f1 score : 0.30444964871194374
recall score : 0.21311475409836064
precision score : 0.5327868852459017
Epoch: [3][0/248] Elapsed 0m 0s (remain 1m 37s) Loss: 0.3777(0.3777) Grad: nan  LR: 0.00001501  
Epoch: [3][100/248] Elapsed 0m 14s (remain 0m 20s) Loss: 0.2295(0.3491) Grad: 4.1250  LR: 0.00001309  
Epoch: [3][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.2627(0.3194) Grad: 9.5094  LR: 0.00001103  
Epoch: [3][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.1389(0.3115) Grad: 6.8115  LR: 0.00001004  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.5201(0.5201) 


Epoch 3 - avg_train_loss: 0.3115  avg_val_loss: 0.7323  time: 40s
INFO:__main__:Epoch 3 - avg_train_loss: 0.3115  avg_val_loss: 0.7323  time: 40s
Epoch 3 - Score: 0.6683
INFO:__main__:Epoch 3 - Score: 0.6683


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.2007(0.7323) 
f1 score : 0.3653846153846154
recall score : 0.3114754098360656
precision score : 0.4418604651162791
Epoch: [4][0/248] Elapsed 0m 0s (remain 1m 27s) Loss: 0.2808(0.2808) Grad: nan  LR: 0.00001002  
Epoch: [4][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0170(0.0529) Grad: 0.9908  LR: 0.00000793  
Epoch: [4][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0433(0.0450) Grad: 4.9639  LR: 0.00000593  
Epoch: [4][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.1458(0.0436) Grad: 11.6303  LR: 0.00000505  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.8403(0.8403) 


Epoch 4 - avg_train_loss: 0.0436  avg_val_loss: 1.0674  time: 40s
INFO:__main__:Epoch 4 - avg_train_loss: 0.0436  avg_val_loss: 1.0674  time: 40s
Epoch 4 - Score: 0.6563
INFO:__main__:Epoch 4 - Score: 0.6563


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.7445(1.0674) 
f1 score : 0.3914590747330961
recall score : 0.36065573770491804
precision score : 0.4280155642023346
Epoch: [5][0/248] Elapsed 0m 0s (remain 1m 21s) Loss: 0.0055(0.0055) Grad: nan  LR: 0.00000503  
Epoch: [5][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.0056(0.0152) Grad: 0.3003  LR: 0.00000333  
Epoch: [5][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0043(0.0108) Grad: 0.2958  LR: 0.00000192  
Epoch: [5][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0032(0.0098) Grad: 0.1764  LR: 0.00000138  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.7996(0.7996) 


Epoch 5 - avg_train_loss: 0.0098  avg_val_loss: 1.2363  time: 40s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0098  avg_val_loss: 1.2363  time: 40s
Epoch 5 - Score: 0.6573
INFO:__main__:Epoch 5 - Score: 0.6573


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 2.4947(1.2363) 
f1 score : 0.3479923518164436
recall score : 0.2983606557377049
precision score : 0.41743119266055045
Epoch: [6][0/248] Elapsed 0m 0s (remain 1m 20s) Loss: 0.0057(0.0057) Grad: nan  LR: 0.00000136  
Epoch: [6][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.0041(0.0049) Grad: 0.3094  LR: 0.00000050  
Epoch: [6][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0039(0.0047) Grad: 0.1838  LR: 0.00000006  
Epoch: [6][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0028(0.0046) Grad: 0.1292  LR: 0.00000000  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.8134(0.8134) 


Epoch 6 - avg_train_loss: 0.0046  avg_val_loss: 1.2507  time: 40s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0046  avg_val_loss: 1.2507  time: 40s
Epoch 6 - Score: 0.6573
INFO:__main__:Epoch 6 - Score: 0.6573


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 2.5448(1.2507) 
f1 score : 0.3479923518164436
recall score : 0.2983606557377049
precision score : 0.41743119266055045


Score: 0.7015
INFO:__main__:Score: 0.7015
ACC BEST Score: 0.7126
INFO:__main__:ACC BEST Score: 0.7126


f1 score : 0.30444964871194374
recall score : 0.21311475409836064
precision score : 0.5327868852459017


DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__:DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "at

Epoch: [1][0/248] Elapsed 0m 0s (remain 1m 35s) Loss: 0.7850(0.7850) Grad: nan  LR: 0.00002000  
Epoch: [1][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.8064(0.6393) Grad: 7.4409  LR: 0.00001977  
Epoch: [1][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.6153(0.6263) Grad: 0.6978  LR: 0.00001912  
Epoch: [1][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.4184(0.6210) Grad: 5.2043  LR: 0.00001867  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.3350(0.3350) 


Epoch 1 - avg_train_loss: 0.6210  avg_val_loss: 0.5961  time: 40s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6210  avg_val_loss: 0.5961  time: 40s
Epoch 1 - Score: 0.6935
INFO:__main__:Epoch 1 - Score: 0.6935
Epoch 1 - Save Best Score: 0.6935 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6935 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.0731(0.5961) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/248] Elapsed 0m 0s (remain 1m 32s) Loss: 0.5156(0.5156) Grad: nan  LR: 0.00001866  
Epoch: [2][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.7055(0.5707) Grad: 5.9034  LR: 0.00001742  
Epoch: [2][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.5021(0.5710) Grad: 1.1686  LR: 0.00001585  
Epoch: [2][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.6168(0.5729) Grad: 1.9129  LR: 0.00001502  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.3026(0.3026) 


Epoch 2 - avg_train_loss: 0.5729  avg_val_loss: 0.5804  time: 40s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5729  avg_val_loss: 0.5804  time: 40s
Epoch 2 - Score: 0.7095
INFO:__main__:Epoch 2 - Score: 0.7095
Epoch 2 - Save Best Score: 0.7095 Model
INFO:__main__:Epoch 2 - Save Best Score: 0.7095 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.9054(0.5804) 
f1 score : 0.17191977077363896
recall score : 0.09836065573770492
precision score : 0.6818181818181818
Epoch: [3][0/248] Elapsed 0m 0s (remain 1m 28s) Loss: 0.6243(0.6243) Grad: nan  LR: 0.00001501  
Epoch: [3][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.6071(0.4822) Grad: 9.1730  LR: 0.00001309  
Epoch: [3][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.3509(0.4789) Grad: 4.0346  LR: 0.00001103  
Epoch: [3][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.3966(0.4694) Grad: 3.2807  LR: 0.00001004  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.2199(0.2199) 


Epoch 3 - avg_train_loss: 0.4694  avg_val_loss: 0.6077  time: 40s
INFO:__main__:Epoch 3 - avg_train_loss: 0.4694  avg_val_loss: 0.6077  time: 40s
Epoch 3 - Score: 0.7035
INFO:__main__:Epoch 3 - Score: 0.7035


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.6968(0.6077) 
f1 score : 0.2891566265060241
recall score : 0.19672131147540983
precision score : 0.5454545454545454
Epoch: [4][0/248] Elapsed 0m 0s (remain 1m 23s) Loss: 0.3204(0.3204) Grad: nan  LR: 0.00001002  
Epoch: [4][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.2394(0.1793) Grad: 11.1698  LR: 0.00000793  
Epoch: [4][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.1187(0.1613) Grad: 6.6220  LR: 0.00000593  
Epoch: [4][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.1323(0.1616) Grad: 6.3697  LR: 0.00000505  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.3465(0.3465) 


Epoch 4 - avg_train_loss: 0.1616  avg_val_loss: 0.8141  time: 40s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1616  avg_val_loss: 0.8141  time: 40s
Epoch 4 - Score: 0.6945
INFO:__main__:Epoch 4 - Score: 0.6945


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.8728(0.8141) 
f1 score : 0.4198473282442748
recall score : 0.36065573770491804
precision score : 0.502283105022831
Epoch: [5][0/248] Elapsed 0m 0s (remain 1m 26s) Loss: 0.0152(0.0152) Grad: nan  LR: 0.00000503  
Epoch: [5][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0152(0.0318) Grad: 1.1102  LR: 0.00000333  
Epoch: [5][200/248] Elapsed 0m 26s (remain 0m 6s) Loss: 0.0172(0.0265) Grad: 0.9491  LR: 0.00000192  
Epoch: [5][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0152(0.0249) Grad: 1.3744  LR: 0.00000138  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.5165(0.5165) 


Epoch 5 - avg_train_loss: 0.0249  avg_val_loss: 0.9886  time: 39s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0249  avg_val_loss: 0.9886  time: 39s
Epoch 5 - Score: 0.6854
INFO:__main__:Epoch 5 - Score: 0.6854


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.3860(0.9886) 
f1 score : 0.4214417744916821
recall score : 0.3737704918032787
precision score : 0.4830508474576271
Epoch: [6][0/248] Elapsed 0m 0s (remain 1m 20s) Loss: 0.0130(0.0130) Grad: nan  LR: 0.00000136  
Epoch: [6][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0110(0.0107) Grad: 0.7375  LR: 0.00000050  
Epoch: [6][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0103(0.0099) Grad: 0.7494  LR: 0.00000006  
Epoch: [6][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0136(0.0097) Grad: 1.4955  LR: 0.00000000  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 12s) Loss: 0.4958(0.4958) 


Epoch 6 - avg_train_loss: 0.0097  avg_val_loss: 1.0094  time: 40s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0097  avg_val_loss: 1.0094  time: 40s
Epoch 6 - Score: 0.6864
INFO:__main__:Epoch 6 - Score: 0.6864


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.4709(1.0094) 
f1 score : 0.41132075471698115
recall score : 0.35737704918032787
precision score : 0.48444444444444446


Score: 0.7095
INFO:__main__:Score: 0.7095
ACC BEST Score: 0.7156
INFO:__main__:ACC BEST Score: 0.7156


f1 score : 0.17191977077363896
recall score : 0.09836065573770492
precision score : 0.6818181818181818


DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "attention_probs_dropout_prob": 0.0,
  "hidden_act": "gelu",
  "hidden_dropout": 0.0,
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 512,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_hidden_states": true,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 768,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "position_biased_input": false,
  "position_buckets": 256,
  "relative_attention": true,
  "share_att_key": true,
  "transformers_version": "4.26.1",
  "type_vocab_size": 0,
  "vocab_size": 128100
}

INFO:__main__:DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-base",
  "attention_dropout": 0.0,
  "at

Epoch: [1][0/248] Elapsed 0m 0s (remain 1m 28s) Loss: 0.9433(0.9433) Grad: nan  LR: 0.00002000  
Epoch: [1][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.4868(0.6465) Grad: 3.4218  LR: 0.00001977  
Epoch: [1][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.6179(0.6240) Grad: 2.6337  LR: 0.00001912  
Epoch: [1][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.5395(0.6212) Grad: 2.6681  LR: 0.00001867  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.1133(0.1133) 


Epoch 1 - avg_train_loss: 0.6212  avg_val_loss: 0.7085  time: 40s
INFO:__main__:Epoch 1 - avg_train_loss: 0.6212  avg_val_loss: 0.7085  time: 40s
Epoch 1 - Score: 0.6942
INFO:__main__:Epoch 1 - Score: 0.6942
Epoch 1 - Save Best Score: 0.6942 Model
INFO:__main__:Epoch 1 - Save Best Score: 0.6942 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 1.7008(0.7085) 
f1 score : 0.0
recall score : 0.0
precision score : 0.0
Epoch: [2][0/248] Elapsed 0m 0s (remain 1m 30s) Loss: 0.8371(0.8371) Grad: nan  LR: 0.00001866  
Epoch: [2][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.4142(0.5804) Grad: 7.5036  LR: 0.00001742  
Epoch: [2][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.5973(0.5757) Grad: 2.9599  LR: 0.00001585  
Epoch: [2][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.6301(0.5737) Grad: 1.8222  LR: 0.00001502  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.4158(0.4158) 


Epoch 2 - avg_train_loss: 0.5737  avg_val_loss: 0.5748  time: 40s
INFO:__main__:Epoch 2 - avg_train_loss: 0.5737  avg_val_loss: 0.5748  time: 40s
Epoch 2 - Score: 0.6891
INFO:__main__:Epoch 2 - Score: 0.6891


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.7889(0.5748) 
f1 score : 0.46815834767641995
recall score : 0.4473684210526316
precision score : 0.49097472924187724
Epoch: [3][0/248] Elapsed 0m 0s (remain 1m 23s) Loss: 0.4629(0.4629) Grad: nan  LR: 0.00001501  
Epoch: [3][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.2423(0.4838) Grad: 2.6699  LR: 0.00001309  
Epoch: [3][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.4257(0.4619) Grad: 11.0288  LR: 0.00001103  
Epoch: [3][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.3892(0.4598) Grad: 3.2949  LR: 0.00001004  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.3303(0.3303) 


Epoch 3 - avg_train_loss: 0.4598  avg_val_loss: 0.5735  time: 40s
INFO:__main__:Epoch 3 - avg_train_loss: 0.4598  avg_val_loss: 0.5735  time: 40s
Epoch 3 - Score: 0.7103
INFO:__main__:Epoch 3 - Score: 0.7103
Epoch 3 - Save Best Score: 0.7103 Model
INFO:__main__:Epoch 3 - Save Best Score: 0.7103 Model


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 0.9634(0.5735) 
f1 score : 0.35714285714285715
recall score : 0.2631578947368421
precision score : 0.5555555555555556
Epoch: [4][0/248] Elapsed 0m 0s (remain 1m 23s) Loss: 0.3803(0.3803) Grad: nan  LR: 0.00001002  
Epoch: [4][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.1624(0.2160) Grad: 7.4024  LR: 0.00000793  
Epoch: [4][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.2060(0.1986) Grad: 11.2471  LR: 0.00000593  
Epoch: [4][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0420(0.1929) Grad: 2.0565  LR: 0.00000505  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.2851(0.2851) 


Epoch 4 - avg_train_loss: 0.1929  avg_val_loss: 0.8629  time: 40s
INFO:__main__:Epoch 4 - avg_train_loss: 0.1929  avg_val_loss: 0.8629  time: 40s
Epoch 4 - Score: 0.6891
INFO:__main__:Epoch 4 - Score: 0.6891


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 2.2466(0.8629) 
f1 score : 0.3297180043383948
recall score : 0.25
precision score : 0.4840764331210191
Epoch: [5][0/248] Elapsed 0m 0s (remain 1m 23s) Loss: 0.2684(0.2684) Grad: nan  LR: 0.00000503  
Epoch: [5][100/248] Elapsed 0m 13s (remain 0m 20s) Loss: 0.0187(0.0539) Grad: 1.3654  LR: 0.00000333  
Epoch: [5][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0144(0.0554) Grad: 0.8857  LR: 0.00000192  
Epoch: [5][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0118(0.0535) Grad: 0.6292  LR: 0.00000138  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.3691(0.3691) 


Epoch 5 - avg_train_loss: 0.0535  avg_val_loss: 1.0412  time: 40s
INFO:__main__:Epoch 5 - avg_train_loss: 0.0535  avg_val_loss: 1.0412  time: 40s
Epoch 5 - Score: 0.6791
INFO:__main__:Epoch 5 - Score: 0.6791


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 2.4963(1.0412) 
f1 score : 0.3255813953488372
recall score : 0.2532894736842105
precision score : 0.4556213017751479
Epoch: [6][0/248] Elapsed 0m 0s (remain 1m 23s) Loss: 0.0448(0.0448) Grad: nan  LR: 0.00000136  
Epoch: [6][100/248] Elapsed 0m 13s (remain 0m 19s) Loss: 0.0070(0.0200) Grad: 0.3114  LR: 0.00000050  
Epoch: [6][200/248] Elapsed 0m 27s (remain 0m 6s) Loss: 0.0073(0.0271) Grad: 0.2464  LR: 0.00000006  
Epoch: [6][247/248] Elapsed 0m 33s (remain 0m 0s) Loss: 0.0144(0.0307) Grad: 1.1832  LR: 0.00000000  
EVAL: [0/32] Elapsed 0m 0s (remain 0m 11s) Loss: 0.5496(0.5496) 


Epoch 6 - avg_train_loss: 0.0307  avg_val_loss: 1.0574  time: 40s
INFO:__main__:Epoch 6 - avg_train_loss: 0.0307  avg_val_loss: 1.0574  time: 40s
Epoch 6 - Score: 0.6700
INFO:__main__:Epoch 6 - Score: 0.6700


EVAL: [31/32] Elapsed 0m 6s (remain 0m 0s) Loss: 2.2283(1.0574) 
f1 score : 0.4014598540145985
recall score : 0.3618421052631579
precision score : 0.45081967213114754


Score: 0.7103
INFO:__main__:Score: 0.7103
ACC BEST Score: 0.7173
INFO:__main__:ACC BEST Score: 0.7173
Score: 0.7063
INFO:__main__:Score: 0.7063
ACC BEST Score: 0.7069
INFO:__main__:ACC BEST Score: 0.7069


f1 score : 0.35714285714285715
recall score : 0.2631578947368421
precision score : 0.5555555555555556
f1 score : 0.25724453482460596
recall score : 0.16611950098489822
precision score : 0.5698198198198198


In [19]:
from google.colab import runtime
runtime.unassign()