<a href="https://colab.research.google.com/github/goya5858/commonlitreadabilityprize/blob/main/working/DeBERTa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install libs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
compe_name = 'commonlitreadabilityprize'
%cd /content/drive/MyDrive/kaggle/works/$compe_name/working/

!pip install -q -q -q -U albumentations
!pip install -q -q -q -U torch
!pip install -q -q -q timm
!pip install -q -q -q pytorch_lightning
!pip install -q -q -q -U transformers
!pip install -q -q -q -U sentencepiece

import os
import re
import gc
import sys
import time
import copy
import random
import warnings
from tqdm import tqdm_notebook as tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
import PIL.Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations
from albumentations.pytorch.transforms import ToTensorV2

import torch.optim as optim
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import CosineAnnealingLR

import timm

import nltk
from wordcloud import WordCloud
nltk.download('stopwords')
from nltk.corpus import stopwords
stop=stopwords.words('english')
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob,Word
from collections import Counter
import string
from torch.nn.utils.rnn import pad_sequence

from torch.nn import MSELoss

from torch.cuda import amp

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import model_selection

import transformers
from transformers import get_linear_schedule_with_warmup, AdamW

from transformers import *

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    #tf.random.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

/content/drive/MyDrive/kaggle/works/commonlitreadabilityprize/working
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Dataset

In [None]:
DEVICE = 'cuda:0'

#get_tokenizer = RobertaTokenizer
#get_model     = BartForSequenceClassification
#get_tokenizer = AutoTokenizer #RobertaTokenizer より汎用性ある気がする　使い勝手的な面で
#get_model     = AutoModel
#get_model     = RobertaForSequenceClassification
#get_model     = RobertaModel
get_tokenizer = DebertaTokenizer
get_model     = DebertaModel

VOCAB_PATH = 'microsoft/deberta-base'
MODEL_PATH = 'microsoft/deberta-base'
#VOCAB_PATH = 'bert-base-uncased'
#MODEL_PATH = 'bert-base-uncased'

In [None]:
ROOT = f"../input/{compe_name}/"
MODEL_ROOT = "../input/models/DeBERTa_model/"

df = pd.read_csv(ROOT+'train.csv')
df.head()

Unnamed: 0,id,url_legal,license,excerpt,target,standard_error
0,c12129c31,,,When the young people returned to the ballroom...,-0.340259,0.464009
1,85aa80a4c,,,"All through dinner time, Mrs. Fayre was somewh...",-0.315372,0.480805
2,b69ac6792,,,"As Roger had predicted, the snow departed as q...",-0.580118,0.476676
3,dd1000b26,,,And outside before the palace a great garden w...,-1.054013,0.450007
4,37c1b32fb,,,Once upon a time there were Three Bears who li...,0.247197,0.510845


In [None]:
test_df = pd.read_csv(ROOT+'test.csv')
test_df.head()

Unnamed: 0,id,url_legal,license,excerpt
0,c0f722661,,,My hope lay in Jack's promise that he would ke...
1,f0953f0a5,,,Dotty continued to go to Mrs. Gray's every nig...
2,0df072751,,,It was a bright and cheerful scene that greete...
3,04caf4e0c,https://en.wikipedia.org/wiki/Cell_division,CC BY-SA 3.0,Cell division is the process by which a parent...
4,0e63f8bea,https://en.wikipedia.org/wiki/Debugging,CC BY-SA 3.0,Debugging is the process of finding and resolv...


In [None]:
def prep_text(text_df):
    text_df = text_df.str.replace("\n","",regex=False)
    return text_df.str.replace("\'s",r"s",regex=True).values

df['excerpt']      = prep_text(df['excerpt'])
test_df['excerpt'] = prep_text(test_df['excerpt'])

MAX_SEQUENCE_LENGTH = df['excerpt'].apply(lambda x: len(x.split())).max()

tokenizer = get_tokenizer.from_pretrained(VOCAB_PATH,
                                          model_max_length=MAX_SEQUENCE_LENGTH
                                          )
df['token']          = df['excerpt'].apply(tokenizer)
test_df['token']   = test_df['excerpt'].apply(tokenizer)

Token indices sequence length is longer than the specified maximum sequence length for this model (222 > 205). Running this sequence through the model will result in indexing errors


In [None]:
class CLPDataset(Dataset):
    def __init__(self, df):
        super().__init__()
        self.token  = df.token
        self.labels = df.target

    def __len__(self):
        return self.labels.shape[0]
  
    def __getitem__(self, idx):
        if len(self.token.iloc[0]) == 2:
            return (
                    torch.tensor(self.token.iloc[idx].input_ids), \
                    #torch.tensor(self.token.iloc[idx].token_type_ids), \
                    torch.tensor(self.token.iloc[idx].attention_mask)
                  ), \
                  torch.tensor(self.labels.iloc[idx])
        if len(self.token.iloc[idx]) == 3:
            return (
                    torch.tensor(self.token.iloc[idx].input_ids), \
                    torch.tensor(self.token.iloc[idx].token_type_ids), \
                    torch.tensor(self.token.iloc[idx].attention_mask)
                    ), \
                  torch.tensor(self.labels.iloc[idx])

In [None]:
def collate_fn(batch):
    inputs, labels = zip(*batch)
    try:
        ids, types, masks = zip(*inputs)
        ids   = pad_sequence(ids, batch_first=True).to(DEVICE)
        types = pad_sequence(types, batch_first=True).to(DEVICE)
        masks = pad_sequence(masks, batch_first=True).to(DEVICE)
        labels= torch.tensor(labels, dtype=torch.float).to(DEVICE)
        return {
                    "input_ids"      : ids, \
                    "token_type_ids" : types, \
                    "attention_mask" : masks
                  }, \
                  labels
    except ValueError:
        ids, masks = zip(*inputs)
        ids   = pad_sequence(ids, batch_first=True).to(DEVICE)
        #types = pad_sequence(types, batch_first=True).to(DEVICE)
        masks = pad_sequence(masks, batch_first=True).to(DEVICE)
        labels= torch.tensor(labels, dtype=torch.float).to(DEVICE)
        return {
                    "input_ids"      : ids, \
                    #"token_type_ids" : types, \
                    "attention_mask" : masks
                  }, \
                  labels

In [None]:
# trainデータを、targetの値をビニングした値を元に層化fold
def create_folds(data, num_splits):
    # we create a new column called kfold and fill it with -1
    folds = pd.DataFrame( np.ones( (data.shape[0],1) )*-1, columns=['kfold'] )
    num_bins = int(np.floor(1 + np.log2(len(data))))

    bins = pd.cut(
          data["target"], bins=num_bins, labels=False
          )

    kf = model_selection.StratifiedKFold(n_splits=num_splits, shuffle=True)
    for f, (t_, v_) in enumerate(kf.split(X=data, y=bins)):
        folds.iloc[v_] = int(f)

    return folds

In [None]:
def get_dataloaders(df, folds, n_fold, BATCH_SIZE):
    #folds = create_folds(df, num_splits=NUM_FOLDS)
    train_df = df[(folds['kfold']!=n_fold)]
    valid_df = df[(folds['kfold']==n_fold)]

    train_dataset = CLPDataset(df=train_df)
    valid_dataset = CLPDataset(df=valid_df)

    train_loader = DataLoader(
        dataset = train_dataset,
        batch_size = BATCH_SIZE,
        shuffle = True,
        collate_fn=collate_fn,
        #num_workers = -1
        )
    valid_loader = DataLoader(
        dataset = valid_dataset,
        batch_size = BATCH_SIZE,
        shuffle = False,
        collate_fn=collate_fn,
        #num_workers = -1
        )
    return train_loader, valid_loader

# model

In [None]:
folds_sample = create_folds(df, num_splits=5)
_, sample_loader = get_dataloaders(df, folds=folds_sample, n_fold=0, BATCH_SIZE=1)
sample_data, _ = iter(sample_loader).next()
sample_data

{'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
        device='cuda:0'),
 'input_ids': tensor([[    1, 11475,  2115,    10,    86,    89,    58,  2873,  6033,    54,
           3033,   561,    11,    10,   790,     9,    49,   308,    11,    10,
           5627,     4,   509,     9,   106,    21,    10,  4046,     6,  7090,
              6, 36687,  9784,   131,     8,    65,    21,    1

In [None]:
class CLPmodel(nn.Module):
    def __init__(self, check_size=False):
        super().__init__()
        self.model  = get_model.from_pretrained(MODEL_PATH).to(DEVICE)
        OUTPUT_SIZE = self.model(**sample_data)[0].shape[-1]
        self.drop1 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(in_features=OUTPUT_SIZE, out_features=1)
        #self.act1 = nn.PReLU()
        #self.drop2 = nn.Dropout(0.5)
        #self.fc2 = nn.Linear(in_features=OUTPUT_SIZE, out_features=1)
        
        if check_size:
            print('base_model`s output_size :', OUTPUT_SIZE)
            print(DEVICE)
  
    def forward(self,inputs):
        out = self.model(**inputs)
        last_hiddens = out[0]
        out = self.drop1(last_hiddens[:,0,:].squeeze(1))
        out = self.fc1(out)
        #out = self.act1(out)
        #out = self.drop2(out)
        #out = out = self.fc2(out)
        return out

In [None]:
model = CLPmodel(check_size=True)
del model, _
gc.collect()

Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


base_model`s output_size : 768
cuda:0


3082

# Training

In [None]:
def train_fn(model, dataloader, loss_fn, optim, scheduler):
    total_loss = 0
    scaler = amp.GradScaler()
    for inputs, labels in tqdm(dataloader):
        optim.zero_grad()
        model = model.to(DEVICE)
        pred = model(inputs)
        loss = loss_fn(pred, labels)
        #loss.backward()
        #optim.step()
        scaler.scale(loss).backward()
        scaler.step(optim)
        scheduler.step()
        scaler.update()
        total_loss += loss.cpu().detach().numpy()
    total_loss /= len(dataloader)
    return np.sqrt(total_loss)

def valid_fn(model, dataloader, loss_fn):
    valid_pred = np.ones(1)
    valid_label = np.zeros(1)
    for inputs, labels in tqdm(dataloader):
        with torch.no_grad():
            model = model.to(DEVICE) 
            pred = model(inputs)
            valid_pred = np.concatenate( [ valid_pred,
                                                          pred.cpu().detach().numpy().reshape(-1,) ] , axis=0)
            valid_label = np.concatenate( [ valid_label,
                                                          labels.cpu().detach().numpy().reshape(-1,) ] , axis=0)
    loss = loss_fn(  torch.tensor( valid_pred[1:] ) , 
                            torch.tensor( valid_label[1:] )  )
    return np.sqrt( loss.cpu().detach().numpy() ), valid_pred[1:]

In [None]:
def loss_fn(pred, labels):
  return MSELoss()(pred.view(-1), labels.view(-1))

def train_fold(folds, n_fold, seed):
    # １fold分の学習を行う
    # モデルを作成し、学習させる　validが一番いいモデルを保存する
    best_score = np.inf
    best_pred = 0
    best_model = []

    train_loader, valid_loader = get_dataloaders(df, folds, n_fold, BATCH_SIZE)
    model = CLPmodel().to(DEVICE)
    optimizer = optim.AdamW(params=model.parameters(), lr=5e-5)
    lr_scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=len(train_loader))

    for epoch in range(EPOCHS):
        train_loss = train_fn(model, train_loader, loss_fn, optimizer, lr_scheduler)
        valid_loss, valid_pred = valid_fn(model, valid_loader, loss_fn)

        print(f"seed : {seed}, fold : {n_fold}, epoch : {epoch}, train_loss : {train_loss}")
        print(f"seed : {seed}, fold : {n_fold}, epoch : {epoch}, valid_loss : {valid_loss}")
        print('='*30)

    if valid_loss < best_score:
        best_socre = valid_loss
        del best_model, best_pred
        gc.collect()
        best_model = copy.deepcopy(model)
        best_pred  = valid_pred

    model_path = MODEL_ROOT+f"deberta-base-seed-{seed}-fold-{n_fold}.pth"
    torch.save( model.to('cpu').state_dict(), model_path)
    del best_model, model
    gc.collect()
    return best_pred

In [None]:
def train_seed(seed):
    seed_everything(seed)
    folds = create_folds(df, num_splits=NUM_FOLDS)
    oof   = np.zeros(df['target'].shape)

    for n_fold in range(NUM_FOLDS):
        best_pred = train_fold(folds, n_fold, seed)
        oof[(folds['kfold']==n_fold)] = best_pred
        print('-='*20)

    print('%'*50)
    oof_score = loss_fn(pred   = torch.tensor(oof).to(DEVICE),
                                  labels = torch.tensor(df['target'].values).to(DEVICE) )
    print(f'oof_score_{seed} :', np.sqrt( oof_score.cpu().detach().numpy()))
    return np.sqrt( oof_score.cpu().detach().numpy() )

In [19]:
EPOCHS = 10
DEVICE = 'cuda:0'
SEEDs = [0, 7, 42, 88, 100]

BATCH_SIZE = 16
NUM_FOLDS = 5

MAX_WORDS = df["excerpt"].apply(lambda x: len(x.split())).max()

oof_scores  = []
for seed in SEEDs:
  print(f'--------------- SEED {seed} is set ---------------')
  oof_score_for_seed = train_seed(seed)
  oof_scores.append(oof_score_for_seed)

print('&%&%'*30)
print("all_oof_score_avg : ", np.mean(oof_scores) )
print("all_oof_scores : ", oof_scores)

--------------- SEED 0 is set ---------------


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 0, train_loss : 0.9755938350729623
seed : 0, fold : 0, epoch : 0, valid_loss : 0.6998608791735211


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 1, train_loss : 0.6891656430054485
seed : 0, fold : 0, epoch : 1, valid_loss : 0.7107813667350179


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 2, train_loss : 0.5461833435491452
seed : 0, fold : 0, epoch : 2, valid_loss : 0.5474136473622001


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 3, train_loss : 0.4446542123679008
seed : 0, fold : 0, epoch : 3, valid_loss : 0.5763092124234516


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 4, train_loss : 0.3790949896574331
seed : 0, fold : 0, epoch : 4, valid_loss : 0.5225596855037018


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 5, train_loss : 0.32377649662824504
seed : 0, fold : 0, epoch : 5, valid_loss : 0.5622386024626856


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 6, train_loss : 0.2860021197818227
seed : 0, fold : 0, epoch : 6, valid_loss : 0.508997471241475


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 7, train_loss : 0.24797264395372867
seed : 0, fold : 0, epoch : 7, valid_loss : 0.5338329118951541


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 8, train_loss : 0.2226730693624854
seed : 0, fold : 0, epoch : 8, valid_loss : 0.5091067159825846


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 9, train_loss : 0.21427265121582906
seed : 0, fold : 0, epoch : 9, valid_loss : 0.5174382483730647
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 0, train_loss : 1.0268682778345362
seed : 0, fold : 1, epoch : 0, valid_loss : 0.755521524600152


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 1, train_loss : 0.7200376250421766
seed : 0, fold : 1, epoch : 1, valid_loss : 0.6409646618570899


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 2, train_loss : 0.5347682127567508
seed : 0, fold : 1, epoch : 2, valid_loss : 0.5651965742236791


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 3, train_loss : 0.4564911405154122
seed : 0, fold : 1, epoch : 3, valid_loss : 0.6321887481954895


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 4, train_loss : 0.4079436374545109
seed : 0, fold : 1, epoch : 4, valid_loss : 0.5684732231134859


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 5, train_loss : 0.3810117917007562
seed : 0, fold : 1, epoch : 5, valid_loss : 0.617098234854229


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 6, train_loss : 0.32487575874076746
seed : 0, fold : 1, epoch : 6, valid_loss : 0.5465247465339124


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 7, train_loss : 0.28510759951271347
seed : 0, fold : 1, epoch : 7, valid_loss : 0.5457737953477316


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 8, train_loss : 0.2865570050344524
seed : 0, fold : 1, epoch : 8, valid_loss : 0.5439169805717045


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 9, train_loss : 0.24878142984363769
seed : 0, fold : 1, epoch : 9, valid_loss : 0.5406431243687039
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 0, train_loss : 0.8175582049874848
seed : 0, fold : 2, epoch : 0, valid_loss : 0.659108805079498


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 1, train_loss : 0.6626213952250556
seed : 0, fold : 2, epoch : 1, valid_loss : 0.6558488139047939


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 2, train_loss : 0.5279070709928692
seed : 0, fold : 2, epoch : 2, valid_loss : 0.5589030342151347


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 3, train_loss : 0.4463957089730878
seed : 0, fold : 2, epoch : 3, valid_loss : 0.587164059079087


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 4, train_loss : 0.3703293786617568
seed : 0, fold : 2, epoch : 4, valid_loss : 0.5199882205598592


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 5, train_loss : 0.32868443036916184
seed : 0, fold : 2, epoch : 5, valid_loss : 0.5527602776367658


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 6, train_loss : 0.2831389487053948
seed : 0, fold : 2, epoch : 6, valid_loss : 0.5161103956753831


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 7, train_loss : 0.2427849375048702
seed : 0, fold : 2, epoch : 7, valid_loss : 0.5300877609560759


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 8, train_loss : 0.23375821831229898
seed : 0, fold : 2, epoch : 8, valid_loss : 0.5132787957061875


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 9, train_loss : 0.20099375014718399
seed : 0, fold : 2, epoch : 9, valid_loss : 0.5170746661643136
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 0, train_loss : 1.2988598508715992
seed : 0, fold : 3, epoch : 0, valid_loss : 1.0374314127618365


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 1, train_loss : 0.9327767098357147
seed : 0, fold : 3, epoch : 1, valid_loss : 0.72107495323198


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 2, train_loss : 0.6386695899483804
seed : 0, fold : 3, epoch : 2, valid_loss : 0.6506614742715885


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 3, train_loss : 0.5344670453446605
seed : 0, fold : 3, epoch : 3, valid_loss : 0.6811474151858657


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 4, train_loss : 0.46293669741282856
seed : 0, fold : 3, epoch : 4, valid_loss : 0.5965656715567433


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 5, train_loss : 0.3993165817986455
seed : 0, fold : 3, epoch : 5, valid_loss : 0.5732790321630543


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 6, train_loss : 0.4119109474068772
seed : 0, fold : 3, epoch : 6, valid_loss : 0.5770893597323368


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 7, train_loss : 0.3519247711134501
seed : 0, fold : 3, epoch : 7, valid_loss : 0.6166917300290446


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 8, train_loss : 0.3271864327096016
seed : 0, fold : 3, epoch : 8, valid_loss : 0.5729385378270918


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 9, train_loss : 0.2857096634194257
seed : 0, fold : 3, epoch : 9, valid_loss : 0.5583927751983515
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 0, train_loss : 0.9434208706253686
seed : 0, fold : 4, epoch : 0, valid_loss : 0.6975006016662679


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 1, train_loss : 0.701493563453075
seed : 0, fold : 4, epoch : 1, valid_loss : 0.6726255587246203


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 2, train_loss : 0.6192994087220858
seed : 0, fold : 4, epoch : 2, valid_loss : 0.6761447209055277


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 3, train_loss : 0.4886859075060653
seed : 0, fold : 4, epoch : 3, valid_loss : 0.5973374885541983


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 4, train_loss : 0.4008455515056693
seed : 0, fold : 4, epoch : 4, valid_loss : 0.5521244558868029


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 5, train_loss : 0.355639492196195
seed : 0, fold : 4, epoch : 5, valid_loss : 0.6106868847776724


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 6, train_loss : 0.30242223193910583
seed : 0, fold : 4, epoch : 6, valid_loss : 0.5431871765806131


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 7, train_loss : 0.25872250871258456
seed : 0, fold : 4, epoch : 7, valid_loss : 0.5882464578810341


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 8, train_loss : 0.2406431674509706
seed : 0, fold : 4, epoch : 8, valid_loss : 0.5336064361706027


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 9, train_loss : 0.20935060996634766
seed : 0, fold : 4, epoch : 9, valid_loss : 0.5355346143344308
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_0 : 0.5340412987388031
--------------- SEED 7 is set ---------------


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 0, train_loss : 1.14333568755462
seed : 7, fold : 0, epoch : 0, valid_loss : 0.818438044499962


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 1, train_loss : 0.7716509001541706
seed : 7, fold : 0, epoch : 1, valid_loss : 0.771446172750674


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 2, train_loss : 0.6482188489559675
seed : 7, fold : 0, epoch : 2, valid_loss : 0.6455144507994659


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 3, train_loss : 0.5558834206670706
seed : 7, fold : 0, epoch : 3, valid_loss : 0.7962576949953998


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 4, train_loss : 0.5008017457496656
seed : 7, fold : 0, epoch : 4, valid_loss : 0.6012024849360581


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 5, train_loss : 0.4431395459979445
seed : 7, fold : 0, epoch : 5, valid_loss : 0.6760496345015331


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 6, train_loss : 0.42319849003878784
seed : 7, fold : 0, epoch : 6, valid_loss : 0.6092692735245444


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 7, train_loss : 0.36428162862383784
seed : 7, fold : 0, epoch : 7, valid_loss : 0.6106701855535009


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 8, train_loss : 0.31003338257662505
seed : 7, fold : 0, epoch : 8, valid_loss : 0.5984386819498478


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 9, train_loss : 0.28316954975998426
seed : 7, fold : 0, epoch : 9, valid_loss : 0.6077170956324304
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 0, train_loss : 1.1030052565342021
seed : 7, fold : 1, epoch : 0, valid_loss : 0.9110740473738627


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 1, train_loss : 0.879947647024765
seed : 7, fold : 1, epoch : 1, valid_loss : 1.1454135394407567


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 2, train_loss : 0.7124412780150207
seed : 7, fold : 1, epoch : 2, valid_loss : 0.662600011563986


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 3, train_loss : 0.6246573719591297
seed : 7, fold : 1, epoch : 3, valid_loss : 0.6849297083815252


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 4, train_loss : 0.5501999624068674
seed : 7, fold : 1, epoch : 4, valid_loss : 0.6144088393689267


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 5, train_loss : 0.49264991651998297
seed : 7, fold : 1, epoch : 5, valid_loss : 0.6252307556119211


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 6, train_loss : 0.47377974428841707
seed : 7, fold : 1, epoch : 6, valid_loss : 0.5744553384266492


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 7, train_loss : 0.4091074783431184
seed : 7, fold : 1, epoch : 7, valid_loss : 0.5975814799784915


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 8, train_loss : 0.3815762763484017
seed : 7, fold : 1, epoch : 8, valid_loss : 0.583379050467539


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 9, train_loss : 0.36908942721161764
seed : 7, fold : 1, epoch : 9, valid_loss : 0.5762467364445342
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 0, train_loss : 0.7636810227434306
seed : 7, fold : 2, epoch : 0, valid_loss : 0.570080037921621


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 1, train_loss : 0.5532631910034395
seed : 7, fold : 2, epoch : 1, valid_loss : 0.6224015243187019


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 2, train_loss : 0.46277198981700085
seed : 7, fold : 2, epoch : 2, valid_loss : 0.5110584777762339


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 3, train_loss : 0.40907735454414634
seed : 7, fold : 2, epoch : 3, valid_loss : 0.5228407658252329


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 4, train_loss : 0.34253364105210227
seed : 7, fold : 2, epoch : 4, valid_loss : 0.5002729146832522


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 5, train_loss : 0.29380977406465103
seed : 7, fold : 2, epoch : 5, valid_loss : 0.5154976948474606


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 6, train_loss : 0.2695022301372628
seed : 7, fold : 2, epoch : 6, valid_loss : 0.5017007244456676


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 7, train_loss : 0.24026604040424615
seed : 7, fold : 2, epoch : 7, valid_loss : 0.5053584475017664


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 8, train_loss : 0.23346946810795283
seed : 7, fold : 2, epoch : 8, valid_loss : 0.49652893247448887


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 9, train_loss : 0.20386239843864576
seed : 7, fold : 2, epoch : 9, valid_loss : 0.5347962508188973
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 0, train_loss : 0.9173283677888613
seed : 7, fold : 3, epoch : 0, valid_loss : 0.7230480839140215


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 1, train_loss : 0.6686861578031029
seed : 7, fold : 3, epoch : 1, valid_loss : 0.6687017072151957


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 2, train_loss : 0.548504841266474
seed : 7, fold : 3, epoch : 2, valid_loss : 0.5886621993981503


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 3, train_loss : 0.4218027393011851
seed : 7, fold : 3, epoch : 3, valid_loss : 0.612366041386538


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 4, train_loss : 0.3865266533595748
seed : 7, fold : 3, epoch : 4, valid_loss : 0.5416368863560187


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 5, train_loss : 0.3155881937321201
seed : 7, fold : 3, epoch : 5, valid_loss : 0.5921023595852695


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 6, train_loss : 0.2848695313230988
seed : 7, fold : 3, epoch : 6, valid_loss : 0.5502193103685802


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 7, train_loss : 0.25824137126616903
seed : 7, fold : 3, epoch : 7, valid_loss : 0.556748164502445


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 8, train_loss : 0.24492135039608676
seed : 7, fold : 3, epoch : 8, valid_loss : 0.5385754825354128


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 9, train_loss : 0.22224773770697778
seed : 7, fold : 3, epoch : 9, valid_loss : 0.6044075313329408
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 0, train_loss : 0.9407415727166643
seed : 7, fold : 4, epoch : 0, valid_loss : 0.6977181838665816


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 1, train_loss : 0.7210462949969055
seed : 7, fold : 4, epoch : 1, valid_loss : 0.7011173343232086


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 2, train_loss : 0.5997578890745006
seed : 7, fold : 4, epoch : 2, valid_loss : 0.5725718254426432


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 3, train_loss : 0.501840622986896
seed : 7, fold : 4, epoch : 3, valid_loss : 0.5904584139656305


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 4, train_loss : 0.45570537632966684
seed : 7, fold : 4, epoch : 4, valid_loss : 0.5521263557990775


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 5, train_loss : 0.3615978109068326
seed : 7, fold : 4, epoch : 5, valid_loss : 0.5539115213053291


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 6, train_loss : 0.3172871176134027
seed : 7, fold : 4, epoch : 6, valid_loss : 0.5269116140980741


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 7, train_loss : 0.2778006126101792
seed : 7, fold : 4, epoch : 7, valid_loss : 0.5518900885664331


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 8, train_loss : 0.25814861276746476
seed : 7, fold : 4, epoch : 8, valid_loss : 0.5260604440033746


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 9, train_loss : 0.22997879008201277
seed : 7, fold : 4, epoch : 9, valid_loss : 0.5424475384545043
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_7 : 0.5739350482075524
--------------- SEED 42 is set ---------------


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 0, train_loss : 0.8111831628712286
seed : 42, fold : 0, epoch : 0, valid_loss : 0.6027043234634025


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 1, train_loss : 0.568347416903302
seed : 42, fold : 0, epoch : 1, valid_loss : 0.6330490912661145


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 2, train_loss : 0.48711531718727474
seed : 42, fold : 0, epoch : 2, valid_loss : 0.5367334556789324


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 3, train_loss : 0.38876081370168
seed : 42, fold : 0, epoch : 3, valid_loss : 0.5818242222416595


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 4, train_loss : 0.35027113692076495
seed : 42, fold : 0, epoch : 4, valid_loss : 0.5147434123309658


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 5, train_loss : 0.296866115880925
seed : 42, fold : 0, epoch : 5, valid_loss : 0.5999646861956829


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 6, train_loss : 0.2686644636757035
seed : 42, fold : 0, epoch : 6, valid_loss : 0.5114497967949874


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 7, train_loss : 0.23349603880168843
seed : 42, fold : 0, epoch : 7, valid_loss : 0.5131171235377087


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 8, train_loss : 0.22780774293294084
seed : 42, fold : 0, epoch : 8, valid_loss : 0.5186578578032998


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 9, train_loss : 0.20242198433049513
seed : 42, fold : 0, epoch : 9, valid_loss : 0.5193640739894675
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 0, train_loss : 1.0092504787033618
seed : 42, fold : 1, epoch : 0, valid_loss : 0.8124731426051958


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 1, train_loss : 0.7362716505074421
seed : 42, fold : 1, epoch : 1, valid_loss : 0.639842750130703


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 2, train_loss : 0.5712518681056732
seed : 42, fold : 1, epoch : 2, valid_loss : 0.5677304475182894


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 3, train_loss : 0.4430362841110943
seed : 42, fold : 1, epoch : 3, valid_loss : 0.6363758523327868


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 4, train_loss : 0.3805248817809908
seed : 42, fold : 1, epoch : 4, valid_loss : 0.5455833606870162


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 5, train_loss : 0.32072625762623685
seed : 42, fold : 1, epoch : 5, valid_loss : 0.5704936619163227


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 6, train_loss : 0.2726199790172212
seed : 42, fold : 1, epoch : 6, valid_loss : 0.5418655435849974


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 7, train_loss : 0.22693846800479484
seed : 42, fold : 1, epoch : 7, valid_loss : 0.5619262918786782


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 8, train_loss : 0.20697374437631813
seed : 42, fold : 1, epoch : 8, valid_loss : 0.5374937127427982


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 9, train_loss : 0.1789256452048499
seed : 42, fold : 1, epoch : 9, valid_loss : 0.5396055837379243
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 0, train_loss : 1.1825304787427418
seed : 42, fold : 2, epoch : 0, valid_loss : 0.9754650314003808


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 1, train_loss : 0.8640805288836203
seed : 42, fold : 2, epoch : 1, valid_loss : 0.6820018356202686


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 2, train_loss : 0.6008896692507357
seed : 42, fold : 2, epoch : 2, valid_loss : 0.6056469438024585


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 3, train_loss : 0.5361248581821099
seed : 42, fold : 2, epoch : 3, valid_loss : 0.6014101950363917


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 4, train_loss : 0.45107680061482924
seed : 42, fold : 2, epoch : 4, valid_loss : 0.5499941239320815


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 5, train_loss : 0.4165463786596257
seed : 42, fold : 2, epoch : 5, valid_loss : 0.6644837428339381


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 6, train_loss : 0.38635173590674887
seed : 42, fold : 2, epoch : 6, valid_loss : 0.5601492752724908


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 7, train_loss : 0.32218475947209413
seed : 42, fold : 2, epoch : 7, valid_loss : 0.5552552340781364


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 8, train_loss : 0.28896640052156536
seed : 42, fold : 2, epoch : 8, valid_loss : 0.5343359625812233


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 9, train_loss : 0.254501150197365
seed : 42, fold : 2, epoch : 9, valid_loss : 0.5873725817296653
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 0, train_loss : 1.0751774448445814
seed : 42, fold : 3, epoch : 0, valid_loss : 0.9270803604161799


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 1, train_loss : 0.8632184146702805
seed : 42, fold : 3, epoch : 1, valid_loss : 0.6976087982655197


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 2, train_loss : 0.6822409098611145
seed : 42, fold : 3, epoch : 2, valid_loss : 0.6559077796780618


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 3, train_loss : 0.5915951968334785
seed : 42, fold : 3, epoch : 3, valid_loss : 0.6553413838380612


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 4, train_loss : 0.5212436162415679
seed : 42, fold : 3, epoch : 4, valid_loss : 0.5932643036484058


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 5, train_loss : 0.48414996914285374
seed : 42, fold : 3, epoch : 5, valid_loss : 0.6772462522132708


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 6, train_loss : 0.4588317416968631
seed : 42, fold : 3, epoch : 6, valid_loss : 0.585726946110078


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 7, train_loss : 0.4136453445555614
seed : 42, fold : 3, epoch : 7, valid_loss : 0.6897704615150192


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 8, train_loss : 0.38089401746079593
seed : 42, fold : 3, epoch : 8, valid_loss : 0.5536396312039386


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 9, train_loss : 0.33788393187996124
seed : 42, fold : 3, epoch : 9, valid_loss : 0.5292663222347885
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 0, train_loss : 1.1002193810046659
seed : 42, fold : 4, epoch : 0, valid_loss : 0.9552542495625422


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 1, train_loss : 0.8832911721488014
seed : 42, fold : 4, epoch : 1, valid_loss : 0.7939374631684345


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 2, train_loss : 0.760768609556353
seed : 42, fold : 4, epoch : 2, valid_loss : 0.665808128226907


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 3, train_loss : 0.665636841147399
seed : 42, fold : 4, epoch : 3, valid_loss : 0.7616410183053867


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 4, train_loss : 0.639776103140723
seed : 42, fold : 4, epoch : 4, valid_loss : 0.6548665495774274


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 5, train_loss : 0.5640147202288807
seed : 42, fold : 4, epoch : 5, valid_loss : 0.6298020078766832


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 6, train_loss : 0.4959673928438608
seed : 42, fold : 4, epoch : 6, valid_loss : 0.6152292916535278


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 7, train_loss : 0.45960826670815813
seed : 42, fold : 4, epoch : 7, valid_loss : 0.618859526264766


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 8, train_loss : 0.3872808849385372
seed : 42, fold : 4, epoch : 8, valid_loss : 0.6034992275754708


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 9, train_loss : 0.3589553142452414
seed : 42, fold : 4, epoch : 9, valid_loss : 0.6344684944908091
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_42 : 0.5636387544598439
--------------- SEED 88 is set ---------------


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 0, train_loss : 1.0131081741514432
seed : 88, fold : 0, epoch : 0, valid_loss : 0.7740117304397098


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 1, train_loss : 0.7624567963275973
seed : 88, fold : 0, epoch : 1, valid_loss : 0.7235617127902847


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 2, train_loss : 0.5848381952210151
seed : 88, fold : 0, epoch : 2, valid_loss : 0.601510311799938


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 3, train_loss : 0.5021355802710916
seed : 88, fold : 0, epoch : 3, valid_loss : 0.6094126002837741


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 4, train_loss : 0.42609264606955677
seed : 88, fold : 0, epoch : 4, valid_loss : 0.5304862707905659


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 5, train_loss : 0.359154031157411
seed : 88, fold : 0, epoch : 5, valid_loss : 0.5308785694718596


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 6, train_loss : 0.32114273905529234
seed : 88, fold : 0, epoch : 6, valid_loss : 0.52201430368963


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 7, train_loss : 0.2792261132503308
seed : 88, fold : 0, epoch : 7, valid_loss : 0.5283425516358274


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 8, train_loss : 0.2508383896767868
seed : 88, fold : 0, epoch : 8, valid_loss : 0.5127592236409131


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 9, train_loss : 0.23139103791899723
seed : 88, fold : 0, epoch : 9, valid_loss : 0.536048094832291
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 0, train_loss : 0.9876972228652077
seed : 88, fold : 1, epoch : 0, valid_loss : 0.7888876781736487


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 1, train_loss : 0.7630786310526012
seed : 88, fold : 1, epoch : 1, valid_loss : 0.8498846025967469


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 2, train_loss : 0.6148579806147695
seed : 88, fold : 1, epoch : 2, valid_loss : 0.6330616153619296


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 3, train_loss : 0.5162136399101497
seed : 88, fold : 1, epoch : 3, valid_loss : 0.6736006567415729


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 4, train_loss : 0.44867858963329654
seed : 88, fold : 1, epoch : 4, valid_loss : 0.581102486682712


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 5, train_loss : 0.39993545091313587
seed : 88, fold : 1, epoch : 5, valid_loss : 0.6012228793662051


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 6, train_loss : 0.33912666416168846
seed : 88, fold : 1, epoch : 6, valid_loss : 0.5608314686837069


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 7, train_loss : 0.3023624237526801
seed : 88, fold : 1, epoch : 7, valid_loss : 0.549145771110593


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 8, train_loss : 0.281243878214548
seed : 88, fold : 1, epoch : 8, valid_loss : 0.5383649380481172


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 9, train_loss : 0.24488756834655165
seed : 88, fold : 1, epoch : 9, valid_loss : 0.5282526559229104
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 0, train_loss : 1.1221520257559339
seed : 88, fold : 2, epoch : 0, valid_loss : 1.006076545655397


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 1, train_loss : 0.8837784283032134
seed : 88, fold : 2, epoch : 1, valid_loss : 0.6680159924451857


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 2, train_loss : 0.5913877452952123
seed : 88, fold : 2, epoch : 2, valid_loss : 0.5894491858381494


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 3, train_loss : 0.49301308664355037
seed : 88, fold : 2, epoch : 3, valid_loss : 0.5876151874653466


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 4, train_loss : 0.43762866795729677
seed : 88, fold : 2, epoch : 4, valid_loss : 0.5457290080712092


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 5, train_loss : 0.3632753472958589
seed : 88, fold : 2, epoch : 5, valid_loss : 0.634567804718319


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 6, train_loss : 0.3345039939494598
seed : 88, fold : 2, epoch : 6, valid_loss : 0.5391927994610468


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 7, train_loss : 0.2933440267654336
seed : 88, fold : 2, epoch : 7, valid_loss : 0.5311888036707447


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 8, train_loss : 0.27876427101184176
seed : 88, fold : 2, epoch : 8, valid_loss : 0.531134596614242


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 9, train_loss : 0.2375460342457841
seed : 88, fold : 2, epoch : 9, valid_loss : 0.5287615329115314
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 0, train_loss : 1.035290304055264
seed : 88, fold : 3, epoch : 0, valid_loss : 0.8104168779101659


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 1, train_loss : 0.7524320731172683
seed : 88, fold : 3, epoch : 1, valid_loss : 0.7705345288175285


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 2, train_loss : 0.6154008807623162
seed : 88, fold : 3, epoch : 2, valid_loss : 0.6633940303572068


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 3, train_loss : 0.5590627985946812
seed : 88, fold : 3, epoch : 3, valid_loss : 0.6694708952320919


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 4, train_loss : 0.5190478298847221
seed : 88, fold : 3, epoch : 4, valid_loss : 0.6358268636117188


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 5, train_loss : 0.4509908690608603
seed : 88, fold : 3, epoch : 5, valid_loss : 0.6464685896046579


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 6, train_loss : 0.41180082896124987
seed : 88, fold : 3, epoch : 6, valid_loss : 0.595425447405092


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 7, train_loss : 0.3739524042705197
seed : 88, fold : 3, epoch : 7, valid_loss : 0.5926342660116937


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 8, train_loss : 0.34675404224803064
seed : 88, fold : 3, epoch : 8, valid_loss : 0.564427135965789


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 9, train_loss : 0.317049773895964
seed : 88, fold : 3, epoch : 9, valid_loss : 0.5910316881571734
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 0, train_loss : 0.983931551893629
seed : 88, fold : 4, epoch : 0, valid_loss : 0.7268004698838038


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 1, train_loss : 0.7303939653054639
seed : 88, fold : 4, epoch : 1, valid_loss : 0.9281141622116397


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 2, train_loss : 0.5759346174562834
seed : 88, fold : 4, epoch : 2, valid_loss : 0.608018568214477


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 3, train_loss : 0.5141335378899725
seed : 88, fold : 4, epoch : 3, valid_loss : 0.637577548091851


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 4, train_loss : 0.45584679520186994
seed : 88, fold : 4, epoch : 4, valid_loss : 0.5560170765668168


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 5, train_loss : 0.3895353661665688
seed : 88, fold : 4, epoch : 5, valid_loss : 0.6010573023478989


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 6, train_loss : 0.36481646976868326
seed : 88, fold : 4, epoch : 6, valid_loss : 0.5418948620974899


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 7, train_loss : 0.2991632325944768
seed : 88, fold : 4, epoch : 7, valid_loss : 0.5611493430163603


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 8, train_loss : 0.24437695099574525
seed : 88, fold : 4, epoch : 8, valid_loss : 0.5186329348240766


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 9, train_loss : 0.20389612412335936
seed : 88, fold : 4, epoch : 9, valid_loss : 0.5291353998107494
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_88 : 0.5431972967755268
--------------- SEED 100 is set ---------------


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 0, train_loss : 0.9901132860616888
seed : 100, fold : 0, epoch : 0, valid_loss : 0.7219426490958127


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 1, train_loss : 0.6688843781814272
seed : 100, fold : 0, epoch : 1, valid_loss : 0.7718977185592176


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 2, train_loss : 0.5609013667919337
seed : 100, fold : 0, epoch : 2, valid_loss : 0.6059636164224033


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 3, train_loss : 0.4585687220216149
seed : 100, fold : 0, epoch : 3, valid_loss : 0.7010524766429335


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 4, train_loss : 0.3606361608658896
seed : 100, fold : 0, epoch : 4, valid_loss : 0.5427333096381105


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 5, train_loss : 0.2722938850561089
seed : 100, fold : 0, epoch : 5, valid_loss : 0.5592133644348657


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 6, train_loss : 0.23830042787252254
seed : 100, fold : 0, epoch : 6, valid_loss : 0.5468553739396259


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 7, train_loss : 0.1895140362440863
seed : 100, fold : 0, epoch : 7, valid_loss : 0.5373662034134726


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 8, train_loss : 0.17138081316518036
seed : 100, fold : 0, epoch : 8, valid_loss : 0.5367868947194709


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 9, train_loss : 0.16110609254707173
seed : 100, fold : 0, epoch : 9, valid_loss : 0.5467529094745504
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 0, train_loss : 0.8498239510873299
seed : 100, fold : 1, epoch : 0, valid_loss : 0.6601638632286643


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 1, train_loss : 0.6670125177416814
seed : 100, fold : 1, epoch : 1, valid_loss : 0.7184947161346376


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 2, train_loss : 0.5005796104768744
seed : 100, fold : 1, epoch : 2, valid_loss : 0.5542413072870215


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 3, train_loss : 0.4179730972484749
seed : 100, fold : 1, epoch : 3, valid_loss : 0.5549844687782575


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 4, train_loss : 0.37747300742422796
seed : 100, fold : 1, epoch : 4, valid_loss : 0.5549702711768273


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 5, train_loss : 0.3040013701448061
seed : 100, fold : 1, epoch : 5, valid_loss : 0.5596410958603996


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 6, train_loss : 0.27228896442647227
seed : 100, fold : 1, epoch : 6, valid_loss : 0.5274866781666532


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 7, train_loss : 0.24489114570271275
seed : 100, fold : 1, epoch : 7, valid_loss : 0.5296890742495194


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 8, train_loss : 0.22467474120610012
seed : 100, fold : 1, epoch : 8, valid_loss : 0.5281235621402984


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 9, train_loss : 0.19752195328277708
seed : 100, fold : 1, epoch : 9, valid_loss : 0.5277657770072641
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 0, train_loss : 0.781674720200496
seed : 100, fold : 2, epoch : 0, valid_loss : 0.6767575643085194


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 1, train_loss : 0.6013102986616669
seed : 100, fold : 2, epoch : 1, valid_loss : 0.6587792172358077


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 2, train_loss : 0.5331562994193879
seed : 100, fold : 2, epoch : 2, valid_loss : 0.5517944954603514


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 3, train_loss : 0.42857516582082883
seed : 100, fold : 2, epoch : 3, valid_loss : 0.5434256066830431


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 4, train_loss : 0.35007356644761484
seed : 100, fold : 2, epoch : 4, valid_loss : 0.5239328597168289


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 5, train_loss : 0.3110267459547863
seed : 100, fold : 2, epoch : 5, valid_loss : 0.5245810415920028


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 6, train_loss : 0.2857969666479299
seed : 100, fold : 2, epoch : 6, valid_loss : 0.5136527955945633


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 7, train_loss : 0.24972597390099735
seed : 100, fold : 2, epoch : 7, valid_loss : 0.5255478904923252


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 8, train_loss : 0.23077813091766863
seed : 100, fold : 2, epoch : 8, valid_loss : 0.4950065612233911


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 9, train_loss : 0.1987537432019925
seed : 100, fold : 2, epoch : 9, valid_loss : 0.5038149330369251
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 0, train_loss : 0.9346977965602924
seed : 100, fold : 3, epoch : 0, valid_loss : 0.6620104976257677


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 1, train_loss : 0.6975767749439361
seed : 100, fold : 3, epoch : 1, valid_loss : 0.7772995749755094


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 2, train_loss : 0.5795018070299546
seed : 100, fold : 3, epoch : 2, valid_loss : 0.5796554638364796


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 3, train_loss : 0.46859594089053985
seed : 100, fold : 3, epoch : 3, valid_loss : 0.5881490248714569


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 4, train_loss : 0.386021325130994
seed : 100, fold : 3, epoch : 4, valid_loss : 0.557818608808978


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 5, train_loss : 0.32475790739989424
seed : 100, fold : 3, epoch : 5, valid_loss : 0.5812355328926928


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 6, train_loss : 0.2765020895002227
seed : 100, fold : 3, epoch : 6, valid_loss : 0.5524519968969046


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 7, train_loss : 0.24432934304102571
seed : 100, fold : 3, epoch : 7, valid_loss : 0.5462036494323917


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 8, train_loss : 0.2223020050513647
seed : 100, fold : 3, epoch : 8, valid_loss : 0.5536849925863814


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 9, train_loss : 0.1954372895209148
seed : 100, fold : 3, epoch : 9, valid_loss : 0.544110594928961
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at microsoft/deberta-base were not used when initializing DebertaModel: ['lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'config', 'lm_predictions.lm_head.dense.bias', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 0, train_loss : 0.8393955803573947
seed : 100, fold : 4, epoch : 0, valid_loss : 0.6342645689297308


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 1, train_loss : 0.604299724918088
seed : 100, fold : 4, epoch : 1, valid_loss : 0.6740145658925777


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 2, train_loss : 0.5338648658996419
seed : 100, fold : 4, epoch : 2, valid_loss : 0.550390019078145


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 3, train_loss : 0.415023681715924
seed : 100, fold : 4, epoch : 3, valid_loss : 0.5727273831242694


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 4, train_loss : 0.36965467153933385
seed : 100, fold : 4, epoch : 4, valid_loss : 0.5271478845964137


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 5, train_loss : 0.3128347444597154
seed : 100, fold : 4, epoch : 5, valid_loss : 0.5769401995364506


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 6, train_loss : 0.2716135109164865
seed : 100, fold : 4, epoch : 6, valid_loss : 0.5273456726854079


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 7, train_loss : 0.24448001710171863
seed : 100, fold : 4, epoch : 7, valid_loss : 0.5181446121300533


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 8, train_loss : 0.22241112107893504
seed : 100, fold : 4, epoch : 8, valid_loss : 0.5040211785239106


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 9, train_loss : 0.19517171493637506
seed : 100, fold : 4, epoch : 9, valid_loss : 0.5244021492012886
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_100 : 0.5295976895412128
&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%
all_oof_score_avg :  0.5488820175445878
all_oof_scores :  [0.5340412987388031, 0.5739350482075524, 0.5636387544598439, 0.5431972967755268, 0.5295976895412128]


In [None]:
gc.collect()

14771