<a href="https://colab.research.google.com/github/goya5858/commonlitreadabilityprize/blob/main/working/BERT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install libs

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
compe_name = 'commonlitreadabilityprize'
%cd /content/drive/MyDrive/kaggle/works/$compe_name/working/

!pip install -q -q -q -U albumentations
!pip install -q -q -q -U torch
!pip install -q -q -q timm
!pip install -q -q -q pytorch_lightning
!pip install -q -q -q -U transformers
!pip install -q -q -q -U sentencepiece

import os
import re
import gc
import sys
import time
import copy
import random
import warnings
from tqdm import tqdm_notebook as tqdm

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
import PIL.Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import albumentations
from albumentations.pytorch.transforms import ToTensorV2

import torch.optim as optim
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import CosineAnnealingLR

import timm

import nltk
from wordcloud import WordCloud
nltk.download('stopwords')
from nltk.corpus import stopwords
stop=stopwords.words('english')
from nltk.stem import WordNetLemmatizer
from textblob import TextBlob,Word
from collections import Counter
import string
from torch.nn.utils.rnn import pad_sequence

from torch.nn import MSELoss

from torch.cuda import amp

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import model_selection

import transformers
from transformers import get_linear_schedule_with_warmup, AdamW

from transformers import *

def seed_everything(seed=1234):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    #tf.random.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

/content/drive/MyDrive/kaggle/works/commonlitreadabilityprize/working
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Dataset

In [3]:
DEVICE = 'cuda:0'

#get_tokenizer = RobertaTokenizer
#get_model     = BartForSequenceClassification
get_tokenizer = AutoTokenizer #RobertaTokenizer より汎用性ある気がする　使い勝手的な面で
get_model     = AutoModel

#VOCAB_PATH = 'roberta-base'
#MODEL_PATH = 'facebook/bart-large-mnli'
VOCAB_PATH = 'bert-base-uncased'
MODEL_PATH = 'bert-base-uncased'

In [4]:
ROOT = f"../input/{compe_name}/"
MODEL_ROOT = "../input/models/"

df = pd.read_csv(ROOT+'train.csv')
df.head()

Unnamed: 0,id,url_legal,license,excerpt,target,standard_error
0,c12129c31,,,When the young people returned to the ballroom...,-0.340259,0.464009
1,85aa80a4c,,,"All through dinner time, Mrs. Fayre was somewh...",-0.315372,0.480805
2,b69ac6792,,,"As Roger had predicted, the snow departed as q...",-0.580118,0.476676
3,dd1000b26,,,And outside before the palace a great garden w...,-1.054013,0.450007
4,37c1b32fb,,,Once upon a time there were Three Bears who li...,0.247197,0.510845


In [5]:
test_df = pd.read_csv(ROOT+'test.csv')
test_df.head()

Unnamed: 0,id,url_legal,license,excerpt
0,c0f722661,,,My hope lay in Jack's promise that he would ke...
1,f0953f0a5,,,Dotty continued to go to Mrs. Gray's every nig...
2,0df072751,,,It was a bright and cheerful scene that greete...
3,04caf4e0c,https://en.wikipedia.org/wiki/Cell_division,CC BY-SA 3.0,Cell division is the process by which a parent...
4,0e63f8bea,https://en.wikipedia.org/wiki/Debugging,CC BY-SA 3.0,Debugging is the process of finding and resolv...


In [6]:
def prep_text(text_df):
  text_df = text_df.str.replace("\n","",regex=False)
  return text_df.str.replace("\'s",r"s",regex=True).values

df['excerpt']      = prep_text(df['excerpt'])
test_df['excerpt'] = prep_text(test_df['excerpt'])

MAX_SEQUENCE_LENGTH = df['excerpt'].apply(lambda x: len(x.split())).max()

tokenizer = get_tokenizer.from_pretrained(VOCAB_PATH,
                                          model_max_length=MAX_SEQUENCE_LENGTH
                                          )
df['token']        = df['excerpt'].apply(tokenizer)
test_df['token']   = test_df['excerpt'].apply(tokenizer)

Token indices sequence length is longer than the specified maximum sequence length for this model (220 > 205). Running this sequence through the model will result in indexing errors


In [7]:
class CLPDataset(Dataset):
  def __init__(self, df):
    super().__init__()
    self.token  = df.token
    self.labels = df.target

  def __len__(self):
    return self.labels.shape[0]
  
  def __getitem__(self, idx):
    if len(self.token.iloc[0]) == 2:
      return (
              torch.tensor(self.token.iloc[idx].input_ids), \
              #torch.tensor(self.token.iloc[idx].token_type_ids), \
              torch.tensor(self.token.iloc[idx].attention_mask)
             ), \
              torch.tensor(self.labels.iloc[idx])
    if len(self.token.iloc[idx]) == 3:
      return (
              torch.tensor(self.token.iloc[idx].input_ids), \
              torch.tensor(self.token.iloc[idx].token_type_ids), \
              torch.tensor(self.token.iloc[idx].attention_mask)
             ), \
              torch.tensor(self.labels.iloc[idx])

In [8]:
def collate_fn(batch):
  inputs, labels = zip(*batch)
  try:
    ids, types, masks = zip(*inputs)
    ids   = pad_sequence(ids, batch_first=True).to(DEVICE)
    types = pad_sequence(types, batch_first=True).to(DEVICE)
    masks = pad_sequence(masks, batch_first=True).to(DEVICE)
    labels= torch.tensor(labels, dtype=torch.float).to(DEVICE)
    return {
        "input_ids"      : ids, \
        "token_type_ids" : types, \
        "attention_mask" : masks
        }, \
        labels
  except ValueError:
    ids, masks = zip(*inputs)
    ids   = pad_sequence(ids, batch_first=True).to(DEVICE)
    #types = pad_sequence(types, batch_first=True).to(DEVICE)
    masks = pad_sequence(masks, batch_first=True).to(DEVICE)
    labels= torch.tensor(labels, dtype=torch.float).to(DEVICE)
    return {
        "input_ids"      : ids, \
        #"token_type_ids" : types, \
        "attention_mask" : masks
        }, \
        labels

In [9]:
# trainデータを、targetの値をビニングした値を元に層化fold
def create_folds(data, num_splits):
    # we create a new column called kfold and fill it with -1
    folds = pd.DataFrame( np.ones( (data.shape[0],1) )*-1, columns=['kfold'] )
    num_bins = int(np.floor(1 + np.log2(len(data))))

    bins = pd.cut(
          data["target"], bins=num_bins, labels=False
          )

    kf = model_selection.StratifiedKFold(n_splits=num_splits, shuffle=True)
    for f, (t_, v_) in enumerate(kf.split(X=data, y=bins)):
        folds.iloc[v_] = int(f)

    return folds

In [10]:
def get_dataloaders(df, folds, n_fold, BATCH_SIZE):
  #folds = create_folds(df, num_splits=NUM_FOLDS)

  train_df = df[(folds['kfold']!=n_fold)]
  valid_df = df[(folds['kfold']==n_fold)]

  train_dataset = CLPDataset(df=train_df)
  valid_dataset = CLPDataset(df=valid_df)

  train_loader = DataLoader(
      dataset = train_dataset,
      batch_size = BATCH_SIZE,
      shuffle = True,
      collate_fn=collate_fn,
      #num_workers = -1
    )
  valid_loader = DataLoader(
      dataset = valid_dataset,
      batch_size = BATCH_SIZE,
      shuffle = False,
      collate_fn=collate_fn,
      #num_workers = -1
    )
  return train_loader, valid_loader

# model

In [11]:
folds_sample = create_folds(df, num_splits=5)
_, sample_loader = get_dataloaders(df, folds=folds_sample, n_fold=0, BATCH_SIZE=1)
sample_data, _ = iter(sample_loader).next()
sample_data

{'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0'),
 'input_ids': tensor([[  101, 11085,  1998,  8812,  2179, 20851,  2051,  2000,  2202,  2019,
          12612,  1997,  1996, 11593,  2482,  1012,  2009,  2001,  1037,  4121,
           3698,  1010,  1998,  4661,  2108,  7130,  2039, 20783,  2135,  2001,
           2036,

In [12]:
class CLPmodel(nn.Module):
  def __init__(self, check_size=False):
    super().__init__()
    self.model  = get_model.from_pretrained(MODEL_PATH).to(DEVICE)
    OUTPUT_SIZE = self.model(**sample_data)[0].shape[-1]
    self.drop = nn.Dropout(0.5)
    self.fc = nn.Linear(in_features=OUTPUT_SIZE, out_features=1)
    if check_size:
      print('base_model`s output_size :', OUTPUT_SIZE)
      print(DEVICE)
  
  def forward(self,inputs):
    out = self.model(**inputs)
    last_hiddens = out[0]
    out = self.drop(last_hiddens[:,0,:].squeeze(1))
    return self.fc(out)

In [13]:
model = CLPmodel(check_size=True)
del model, _
gc.collect()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


base_model`s output_size : 768
cuda:0


218

# Training

In [14]:
def train_fn(model, dataloader, loss_fn, optim, scheduler):
    total_loss = 0
    scaler = amp.GradScaler()
    for inputs, labels in tqdm(dataloader):
        optim.zero_grad()
        model = model.to(DEVICE)
        pred = model(inputs)
        loss = loss_fn(pred, labels)
        #loss.backward()
        #optim.step()
        scaler.scale(loss).backward()
        scaler.step(optim)
        scheduler.step()
        scaler.update()
        total_loss += loss.cpu().detach().numpy()
    total_loss /= len(dataloader)
    return np.sqrt(total_loss)

def valid_fn(model, dataloader, loss_fn):
    valid_pred = np.ones(1)
    valid_label = np.zeros(1)
    for inputs, labels in tqdm(dataloader):
        with torch.no_grad():
            model = model.to(DEVICE) 
            pred = model(inputs)
            valid_pred = np.concatenate( [ valid_pred,
                                                          pred.cpu().detach().numpy().reshape(-1,) ] , axis=0)
            valid_label = np.concatenate( [ valid_label,
                                                          labels.cpu().detach().numpy().reshape(-1,) ] , axis=0)
    loss = loss_fn(  torch.tensor( valid_pred[1:] ) , 
                            torch.tensor( valid_label[1:] )  )
    return np.sqrt( loss.cpu().detach().numpy() ), valid_pred[1:]

In [15]:
def loss_fn(pred, labels):
  return MSELoss()(pred.view(-1), labels.view(-1))

def train_fold(folds, n_fold, seed):
    # １fold分の学習を行う
    # モデルを作成し、学習させる　validが一番いいモデルを保存する
    best_score = np.inf
    best_pred = 0
    best_model = []

    train_loader, valid_loader = get_dataloaders(df, folds, n_fold, BATCH_SIZE)
    model = CLPmodel().to(DEVICE)
    optimizer = optim.AdamW(params=model.parameters(), lr=5e-5)
    lr_scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=len(train_loader))

    for epoch in range(EPOCHS):
        train_loss = train_fn(model, train_loader, loss_fn, optimizer, lr_scheduler)
        valid_loss, valid_pred = valid_fn(model, valid_loader, loss_fn)

        print(f"seed : {seed}, fold : {n_fold}, epoch : {epoch}, train_loss : {train_loss}")
        print(f"seed : {seed}, fold : {n_fold}, epoch : {epoch}, valid_loss : {valid_loss}")
        print('='*30)

    if valid_loss < best_score:
        best_socre = valid_loss
        del best_model, best_pred
        gc.collect()
        best_model = copy.deepcopy(model)
        best_pred  = valid_pred

    model_path = MODEL_ROOT+f"{MODEL_PATH}-seed-{seed}-fold-{n_fold}.pth"
    torch.save( model.to('cpu').state_dict(), model_path)
    del best_model, model
    gc.collect()
    return best_pred

In [16]:
def train_seed(seed):
    seed_everything(seed)
    folds = create_folds(df, num_splits=NUM_FOLDS)
    oof   = np.zeros(df['target'].shape)

    for n_fold in range(NUM_FOLDS):
        best_pred = train_fold(folds, n_fold, seed)
        oof[(folds['kfold']==n_fold)] = best_pred
        print('-='*20)

    print('%'*50)
    oof_score = loss_fn(pred   = torch.tensor(oof).to(DEVICE),
                                  labels = torch.tensor(df['target'].values).to(DEVICE) )
    print(f'oof_score_{seed} :', np.sqrt( oof_score.cpu().detach().numpy()))
    return np.sqrt( oof_score.cpu().detach().numpy() )

In [17]:
EPOCHS = 10
DEVICE = 'cuda:0'
SEEDs = [0, 7 ,42, 88, 100]

BATCH_SIZE = 16
NUM_FOLDS = 5

MAX_WORDS = df["excerpt"].apply(lambda x: len(x.split())).max()

oof_scores  = []
for seed in SEEDs:
  print(f'--------------- SEED {seed} is set ---------------')
  oof_score_for_seed = train_seed(seed)
  oof_scores.append(oof_score_for_seed)

print('&%&%'*30)
print("all_oof_score_avg : ", np.mean(oof_scores) )
print("all_oof_scores : ", oof_scores)

--------------- SEED 0 is set ---------------


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 0, train_loss : 0.6863900967811427
seed : 0, fold : 0, epoch : 0, valid_loss : 0.6066374273220396


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 1, train_loss : 0.5350908413670856
seed : 0, fold : 0, epoch : 1, valid_loss : 0.6422386506324017


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 2, train_loss : 0.42315903761889917
seed : 0, fold : 0, epoch : 2, valid_loss : 0.5496512194573024


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 3, train_loss : 0.3549781855188225
seed : 0, fold : 0, epoch : 3, valid_loss : 0.5554777239968062


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 4, train_loss : 0.31600860185253465
seed : 0, fold : 0, epoch : 4, valid_loss : 0.526890217800589


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 5, train_loss : 0.25922901569992723
seed : 0, fold : 0, epoch : 5, valid_loss : 0.559058490193717


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 6, train_loss : 0.24006189478866882
seed : 0, fold : 0, epoch : 6, valid_loss : 0.5297421751048322


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 7, train_loss : 0.2130449528406651
seed : 0, fold : 0, epoch : 7, valid_loss : 0.572175077925705


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 8, train_loss : 0.2084830892809082
seed : 0, fold : 0, epoch : 8, valid_loss : 0.5275997753894685


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 0, epoch : 9, train_loss : 0.18729317823138586
seed : 0, fold : 0, epoch : 9, valid_loss : 0.5378196200711077
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 0, train_loss : 0.73399019276414
seed : 0, fold : 1, epoch : 0, valid_loss : 0.5882901997605859


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 1, train_loss : 0.5515014015558879
seed : 0, fold : 1, epoch : 1, valid_loss : 0.6949729572216491


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 2, train_loss : 0.43704035274434394
seed : 0, fold : 1, epoch : 2, valid_loss : 0.5367414847992636


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 3, train_loss : 0.35555413747133047
seed : 0, fold : 1, epoch : 3, valid_loss : 0.5734667371093867


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 4, train_loss : 0.314302803688563
seed : 0, fold : 1, epoch : 4, valid_loss : 0.5236687475072602


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 5, train_loss : 0.2679984045044141
seed : 0, fold : 1, epoch : 5, valid_loss : 0.5457072648339314


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 6, train_loss : 0.24581417181170234
seed : 0, fold : 1, epoch : 6, valid_loss : 0.5303900299400497


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 7, train_loss : 0.228211365138559
seed : 0, fold : 1, epoch : 7, valid_loss : 0.5278821731123428


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 8, train_loss : 0.22257861227645884
seed : 0, fold : 1, epoch : 8, valid_loss : 0.5244920064372844


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 1, epoch : 9, train_loss : 0.20044266207898553
seed : 0, fold : 1, epoch : 9, valid_loss : 0.5868830785755131
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 0, train_loss : 0.6831518939610693
seed : 0, fold : 2, epoch : 0, valid_loss : 0.5940226473226966


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 1, train_loss : 0.5330748730253957
seed : 0, fold : 2, epoch : 1, valid_loss : 0.6687682598670417


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 2, train_loss : 0.4200628399202751
seed : 0, fold : 2, epoch : 2, valid_loss : 0.5361438235844737


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 3, train_loss : 0.3401639030958157
seed : 0, fold : 2, epoch : 3, valid_loss : 0.5342968024280623


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 4, train_loss : 0.2926572038643817
seed : 0, fold : 2, epoch : 4, valid_loss : 0.5168514255766146


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 5, train_loss : 0.2630379868206314
seed : 0, fold : 2, epoch : 5, valid_loss : 0.5315759852367081


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 6, train_loss : 0.2471027461019598
seed : 0, fold : 2, epoch : 6, valid_loss : 0.5073613019487935


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 7, train_loss : 0.21765292799464145
seed : 0, fold : 2, epoch : 7, valid_loss : 0.515804025430246


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 8, train_loss : 0.2079542430859228
seed : 0, fold : 2, epoch : 8, valid_loss : 0.5176941388236558


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 2, epoch : 9, train_loss : 0.1908328779576542
seed : 0, fold : 2, epoch : 9, valid_loss : 0.5297396888860219
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 0, train_loss : 0.7057749368079512
seed : 0, fold : 3, epoch : 0, valid_loss : 0.6058762724117024


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 1, train_loss : 0.5809908056207265
seed : 0, fold : 3, epoch : 1, valid_loss : 0.6851585028200569


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 2, train_loss : 0.4849856343619809
seed : 0, fold : 3, epoch : 2, valid_loss : 0.5500162397044297


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 3, train_loss : 0.39521910255827836
seed : 0, fold : 3, epoch : 3, valid_loss : 0.5996595821852538


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 4, train_loss : 0.35149856073253877
seed : 0, fold : 3, epoch : 4, valid_loss : 0.5256743473740473


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 5, train_loss : 0.30747741640508164
seed : 0, fold : 3, epoch : 5, valid_loss : 0.5441700406042667


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 6, train_loss : 0.27960567382353724
seed : 0, fold : 3, epoch : 6, valid_loss : 0.5356470856756314


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 7, train_loss : 0.26393288579333934
seed : 0, fold : 3, epoch : 7, valid_loss : 0.5620173653108511


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 8, train_loss : 0.2491771213236318
seed : 0, fold : 3, epoch : 8, valid_loss : 0.5333765357358254


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 3, epoch : 9, train_loss : 0.22921353800096078
seed : 0, fold : 3, epoch : 9, valid_loss : 0.5530064912451667
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 0, train_loss : 0.7212314633329453
seed : 0, fold : 4, epoch : 0, valid_loss : 0.6237371311199827


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 1, train_loss : 0.5664875993859888
seed : 0, fold : 4, epoch : 1, valid_loss : 0.5908541185646912


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 2, train_loss : 0.4744365089116901
seed : 0, fold : 4, epoch : 2, valid_loss : 0.5655180987737941


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 3, train_loss : 0.4077783161182664
seed : 0, fold : 4, epoch : 3, valid_loss : 0.6201277068113727


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 4, train_loss : 0.35396109920133856
seed : 0, fold : 4, epoch : 4, valid_loss : 0.5533874600083574


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 5, train_loss : 0.31357430346173143
seed : 0, fold : 4, epoch : 5, valid_loss : 0.5502893835627498


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 6, train_loss : 0.2714187980812192
seed : 0, fold : 4, epoch : 6, valid_loss : 0.5550189113377353


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 7, train_loss : 0.2485124774316782
seed : 0, fold : 4, epoch : 7, valid_loss : 0.5499649485620285


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 8, train_loss : 0.23093203257198877
seed : 0, fold : 4, epoch : 8, valid_loss : 0.5465893696453702


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 0, fold : 4, epoch : 9, train_loss : 0.21053421511741133
seed : 0, fold : 4, epoch : 9, valid_loss : 0.5383889972669086
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_0 : 0.5495463827980954
--------------- SEED 7 is set ---------------


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 0, train_loss : 0.6836837093407853
seed : 7, fold : 0, epoch : 0, valid_loss : 0.6001658577819237


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 1, train_loss : 0.5516832431811425
seed : 7, fold : 0, epoch : 1, valid_loss : 0.8276716228567831


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 2, train_loss : 0.4429449566910901
seed : 7, fold : 0, epoch : 2, valid_loss : 0.5504430630877356


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 3, train_loss : 0.3567842610403583
seed : 7, fold : 0, epoch : 3, valid_loss : 0.5627416049786336


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 4, train_loss : 0.2921166669916949
seed : 7, fold : 0, epoch : 4, valid_loss : 0.5492787050751567


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 5, train_loss : 0.24146400728726006
seed : 7, fold : 0, epoch : 5, valid_loss : 0.5756966954680439


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 6, train_loss : 0.2384652005404739
seed : 7, fold : 0, epoch : 6, valid_loss : 0.5411556199008435


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 7, train_loss : 0.20985042898138392
seed : 7, fold : 0, epoch : 7, valid_loss : 0.5478806697617209


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 8, train_loss : 0.18843041252854173
seed : 7, fold : 0, epoch : 8, valid_loss : 0.5351976525267325


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 0, epoch : 9, train_loss : 0.16999654312749196
seed : 7, fold : 0, epoch : 9, valid_loss : 0.5399259130842126
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 0, train_loss : 0.7040335055660839
seed : 7, fold : 1, epoch : 0, valid_loss : 0.6067149910842556


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 1, train_loss : 0.5607749065913923
seed : 7, fold : 1, epoch : 1, valid_loss : 0.6208600167127017


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 2, train_loss : 0.4563786975018067
seed : 7, fold : 1, epoch : 2, valid_loss : 0.5462688144326389


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 3, train_loss : 0.38621646926169434
seed : 7, fold : 1, epoch : 3, valid_loss : 0.5785441660563572


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 4, train_loss : 0.3325484029100825
seed : 7, fold : 1, epoch : 4, valid_loss : 0.5362554490902766


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 5, train_loss : 0.29903580127777135
seed : 7, fold : 1, epoch : 5, valid_loss : 0.5374405676889268


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 6, train_loss : 0.2741390588528295
seed : 7, fold : 1, epoch : 6, valid_loss : 0.536469394733705


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 7, train_loss : 0.25659599511127495
seed : 7, fold : 1, epoch : 7, valid_loss : 0.5411086257639172


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 8, train_loss : 0.24361796315394882
seed : 7, fold : 1, epoch : 8, valid_loss : 0.5340437638584568


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 1, epoch : 9, train_loss : 0.22215973684272072
seed : 7, fold : 1, epoch : 9, valid_loss : 0.547612544609238
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 0, train_loss : 0.7101647315671943
seed : 7, fold : 2, epoch : 0, valid_loss : 0.5643133237913689


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 1, train_loss : 0.5661878150259033
seed : 7, fold : 2, epoch : 1, valid_loss : 0.6431147569859851


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 2, train_loss : 0.4367326326897424
seed : 7, fold : 2, epoch : 2, valid_loss : 0.5181597598765035


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 3, train_loss : 0.3570086038990875
seed : 7, fold : 2, epoch : 3, valid_loss : 0.5254111269430559


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 4, train_loss : 0.3168353115011157
seed : 7, fold : 2, epoch : 4, valid_loss : 0.5013307192631977


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 5, train_loss : 0.2628248379402992
seed : 7, fold : 2, epoch : 5, valid_loss : 0.508447493407212


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 6, train_loss : 0.2450999688756357
seed : 7, fold : 2, epoch : 6, valid_loss : 0.5094329656213413


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 7, train_loss : 0.2225230862761797
seed : 7, fold : 2, epoch : 7, valid_loss : 0.520443996235197


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 8, train_loss : 0.2150799466583335
seed : 7, fold : 2, epoch : 8, valid_loss : 0.5054955119170481


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 2, epoch : 9, train_loss : 0.19642342531723683
seed : 7, fold : 2, epoch : 9, valid_loss : 0.5497343187168866
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 0, train_loss : 0.7003731880178836
seed : 7, fold : 3, epoch : 0, valid_loss : 0.6060963876652566


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 1, train_loss : 0.5462890779461169
seed : 7, fold : 3, epoch : 1, valid_loss : 0.5893911373848651


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 2, train_loss : 0.447265063083173
seed : 7, fold : 3, epoch : 2, valid_loss : 0.5444041372897088


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 3, train_loss : 0.3682863192670522
seed : 7, fold : 3, epoch : 3, valid_loss : 0.5476551591584599


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 4, train_loss : 0.3214143990499628
seed : 7, fold : 3, epoch : 4, valid_loss : 0.53815237927519


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 5, train_loss : 0.28198853967502047
seed : 7, fold : 3, epoch : 5, valid_loss : 0.5368615557054693


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 6, train_loss : 0.2748930594581585
seed : 7, fold : 3, epoch : 6, valid_loss : 0.5316023540808048


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 7, train_loss : 0.2430923888332181
seed : 7, fold : 3, epoch : 7, valid_loss : 0.5545716304928167


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 8, train_loss : 0.21423691170798387
seed : 7, fold : 3, epoch : 8, valid_loss : 0.532539059098805


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 3, epoch : 9, train_loss : 0.19400550536920583
seed : 7, fold : 3, epoch : 9, valid_loss : 0.5411558962813983
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 0, train_loss : 0.7199231985299123
seed : 7, fold : 4, epoch : 0, valid_loss : 0.5967531253249737


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 1, train_loss : 0.5494549211112782
seed : 7, fold : 4, epoch : 1, valid_loss : 0.6202249250902289


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 2, train_loss : 0.43044160037068785
seed : 7, fold : 4, epoch : 2, valid_loss : 0.5356080894948849


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 3, train_loss : 0.3460456087816815
seed : 7, fold : 4, epoch : 3, valid_loss : 0.5623032718460445


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 4, train_loss : 0.2996337503027711
seed : 7, fold : 4, epoch : 4, valid_loss : 0.5255344047970597


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 5, train_loss : 0.2536837662650771
seed : 7, fold : 4, epoch : 5, valid_loss : 0.5060154010327412


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 6, train_loss : 0.233398568321779
seed : 7, fold : 4, epoch : 6, valid_loss : 0.5025294379138707


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 7, train_loss : 0.212403810260435
seed : 7, fold : 4, epoch : 7, valid_loss : 0.5030007029926272


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 8, train_loss : 0.1967784605553499
seed : 7, fold : 4, epoch : 8, valid_loss : 0.5075209376576283


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 7, fold : 4, epoch : 9, train_loss : 0.18788069157685455
seed : 7, fold : 4, epoch : 9, valid_loss : 0.5212650932339371
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_7 : 0.5400387875270213
--------------- SEED 42 is set ---------------


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 0, train_loss : 0.680196080075073
seed : 42, fold : 0, epoch : 0, valid_loss : 0.5786195358586678


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 1, train_loss : 0.5455728914108116
seed : 42, fold : 0, epoch : 1, valid_loss : 0.6058167948593871


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 2, train_loss : 0.42693820781953834
seed : 42, fold : 0, epoch : 2, valid_loss : 0.5293585079295038


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 3, train_loss : 0.3451638025844081
seed : 42, fold : 0, epoch : 3, valid_loss : 0.5353943369038532


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 4, train_loss : 0.2807420984199865
seed : 42, fold : 0, epoch : 4, valid_loss : 0.5151182369501172


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 5, train_loss : 0.24609336742663052
seed : 42, fold : 0, epoch : 5, valid_loss : 0.5303969974600259


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 6, train_loss : 0.21853038185893167
seed : 42, fold : 0, epoch : 6, valid_loss : 0.5095734130518593


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 7, train_loss : 0.19642677342025724
seed : 42, fold : 0, epoch : 7, valid_loss : 0.5195966994138208


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 8, train_loss : 0.17591196105922483
seed : 42, fold : 0, epoch : 8, valid_loss : 0.5133275094118338


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 0, epoch : 9, train_loss : 0.1709355050266225
seed : 42, fold : 0, epoch : 9, valid_loss : 0.5316670184152772
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 0, train_loss : 0.6822901349689114
seed : 42, fold : 1, epoch : 0, valid_loss : 0.6092292816976429


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 1, train_loss : 0.5105301520592632
seed : 42, fold : 1, epoch : 1, valid_loss : 0.6029337382171609


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 2, train_loss : 0.40843853870886127
seed : 42, fold : 1, epoch : 2, valid_loss : 0.5505678928840363


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 3, train_loss : 0.32088641765278475
seed : 42, fold : 1, epoch : 3, valid_loss : 0.5671327289287105


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 4, train_loss : 0.28799663515533724
seed : 42, fold : 1, epoch : 4, valid_loss : 0.5480605681432241


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 5, train_loss : 0.24241836884536588
seed : 42, fold : 1, epoch : 5, valid_loss : 0.5475613938407249


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 6, train_loss : 0.2277061114615467
seed : 42, fold : 1, epoch : 6, valid_loss : 0.5386593303982607


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 7, train_loss : 0.20398329141580998
seed : 42, fold : 1, epoch : 7, valid_loss : 0.5436912892670647


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 8, train_loss : 0.19916229559582185
seed : 42, fold : 1, epoch : 8, valid_loss : 0.5440370496167478


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 1, epoch : 9, train_loss : 0.18102290162849075
seed : 42, fold : 1, epoch : 9, valid_loss : 0.5460520035487254
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 0, train_loss : 0.6999622813474656
seed : 42, fold : 2, epoch : 0, valid_loss : 0.5559522168978386


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 1, train_loss : 0.5115595058035927
seed : 42, fold : 2, epoch : 1, valid_loss : 0.6385912999118095


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 2, train_loss : 0.4100407718313937
seed : 42, fold : 2, epoch : 2, valid_loss : 0.51829814416312


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 3, train_loss : 0.3411089177734187
seed : 42, fold : 2, epoch : 3, valid_loss : 0.5389251227507388


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 4, train_loss : 0.28119895662905153
seed : 42, fold : 2, epoch : 4, valid_loss : 0.5141808287214027


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 5, train_loss : 0.23342449407641472
seed : 42, fold : 2, epoch : 5, valid_loss : 0.526887852435022


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 6, train_loss : 0.2184020712082236
seed : 42, fold : 2, epoch : 6, valid_loss : 0.5095588609146067


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 7, train_loss : 0.1974704647842813
seed : 42, fold : 2, epoch : 7, valid_loss : 0.5167173307873805


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 8, train_loss : 0.19449456265941673
seed : 42, fold : 2, epoch : 8, valid_loss : 0.5061598676152371


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 2, epoch : 9, train_loss : 0.18941646102138418
seed : 42, fold : 2, epoch : 9, valid_loss : 0.5145366455520649
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 0, train_loss : 0.7364822026542073
seed : 42, fold : 3, epoch : 0, valid_loss : 0.6215351614405913


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 1, train_loss : 0.6005970571980928
seed : 42, fold : 3, epoch : 1, valid_loss : 0.7013719687323072


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 2, train_loss : 0.5240791846011708
seed : 42, fold : 3, epoch : 2, valid_loss : 0.5620003601857543


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 3, train_loss : 0.4347025773982598
seed : 42, fold : 3, epoch : 3, valid_loss : 0.5923225455407127


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 4, train_loss : 0.3839723289395959
seed : 42, fold : 3, epoch : 4, valid_loss : 0.52642502333136


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 5, train_loss : 0.3327147794292054
seed : 42, fold : 3, epoch : 5, valid_loss : 0.5311216633967952


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 6, train_loss : 0.29616731896206633
seed : 42, fold : 3, epoch : 6, valid_loss : 0.526269037019092


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 7, train_loss : 0.26678018153994004
seed : 42, fold : 3, epoch : 7, valid_loss : 0.5274218531712221


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 8, train_loss : 0.24811699045387844
seed : 42, fold : 3, epoch : 8, valid_loss : 0.5165571183060244


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 3, epoch : 9, train_loss : 0.22599627544554654
seed : 42, fold : 3, epoch : 9, valid_loss : 0.5152832501190168
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 0, train_loss : 0.7356052653505025
seed : 42, fold : 4, epoch : 0, valid_loss : 0.6303440036561474


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 1, train_loss : 0.582319833334091
seed : 42, fold : 4, epoch : 1, valid_loss : 0.6624131570409943


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 2, train_loss : 0.5025789305032131
seed : 42, fold : 4, epoch : 2, valid_loss : 0.6089266759699004


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 3, train_loss : 0.4428552947321513
seed : 42, fold : 4, epoch : 3, valid_loss : 0.5957337673243286


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 4, train_loss : 0.384925387473755
seed : 42, fold : 4, epoch : 4, valid_loss : 0.55593762380407


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 5, train_loss : 0.3485673607147982
seed : 42, fold : 4, epoch : 5, valid_loss : 0.6437956472854717


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 6, train_loss : 0.3105252405850995
seed : 42, fold : 4, epoch : 6, valid_loss : 0.5485398523032246


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 7, train_loss : 0.27895403916980116
seed : 42, fold : 4, epoch : 7, valid_loss : 0.572592069520908


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 8, train_loss : 0.2617436682915736
seed : 42, fold : 4, epoch : 8, valid_loss : 0.5307498138926157


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 42, fold : 4, epoch : 9, train_loss : 0.23681815423565444
seed : 42, fold : 4, epoch : 9, valid_loss : 0.5340403341049034
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_42 : 0.5284498088493205
--------------- SEED 88 is set ---------------


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 0, train_loss : 0.681516173746692
seed : 88, fold : 0, epoch : 0, valid_loss : 0.5708198446428983


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 1, train_loss : 0.5146392639516636
seed : 88, fold : 0, epoch : 1, valid_loss : 0.6573466439134537


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 2, train_loss : 0.4078971452335251
seed : 88, fold : 0, epoch : 2, valid_loss : 0.5270944682993041


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 3, train_loss : 0.3288264524716587
seed : 88, fold : 0, epoch : 3, valid_loss : 0.5606139045081083


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 4, train_loss : 0.27728721018310376
seed : 88, fold : 0, epoch : 4, valid_loss : 0.5097214926735582


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 5, train_loss : 0.22895862863543806
seed : 88, fold : 0, epoch : 5, valid_loss : 0.5140662004365054


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 6, train_loss : 0.20943913977053003
seed : 88, fold : 0, epoch : 6, valid_loss : 0.49752699185381083


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 7, train_loss : 0.19932894155067699
seed : 88, fold : 0, epoch : 7, valid_loss : 0.5228554477875892


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 8, train_loss : 0.20660530991631237
seed : 88, fold : 0, epoch : 8, valid_loss : 0.505150206209367


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 0, epoch : 9, train_loss : 0.1919966421300173
seed : 88, fold : 0, epoch : 9, valid_loss : 0.5218581748785612
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 0, train_loss : 0.7699165453554524
seed : 88, fold : 1, epoch : 0, valid_loss : 0.6233808929920267


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 1, train_loss : 0.586633878381679
seed : 88, fold : 1, epoch : 1, valid_loss : 0.6203052084824423


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 2, train_loss : 0.47467848932817264
seed : 88, fold : 1, epoch : 2, valid_loss : 0.5723028036837902


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 3, train_loss : 0.41629142065772173
seed : 88, fold : 1, epoch : 3, valid_loss : 0.5451597716498108


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 4, train_loss : 0.3686673799901007
seed : 88, fold : 1, epoch : 4, valid_loss : 0.5584436780960801


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 5, train_loss : 0.30425881254469184
seed : 88, fold : 1, epoch : 5, valid_loss : 0.5512518153943147


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 6, train_loss : 0.2864517263710053
seed : 88, fold : 1, epoch : 6, valid_loss : 0.536425594673708


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 7, train_loss : 0.26437529050243586
seed : 88, fold : 1, epoch : 7, valid_loss : 0.5421422285149289


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 8, train_loss : 0.25433190286337615
seed : 88, fold : 1, epoch : 8, valid_loss : 0.5318306832590336


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 1, epoch : 9, train_loss : 0.22974358570443798
seed : 88, fold : 1, epoch : 9, valid_loss : 0.5724373312252937
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 0, train_loss : 0.6725148047001112
seed : 88, fold : 2, epoch : 0, valid_loss : 0.5450373138852241


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 1, train_loss : 0.5021755448336958
seed : 88, fold : 2, epoch : 1, valid_loss : 0.5500222615715953


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 2, train_loss : 0.38808377537206556
seed : 88, fold : 2, epoch : 2, valid_loss : 0.5198918082441684


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 3, train_loss : 0.30571569994799613
seed : 88, fold : 2, epoch : 3, valid_loss : 0.5128407666146383


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 4, train_loss : 0.28460697140968666
seed : 88, fold : 2, epoch : 4, valid_loss : 0.51457664544388


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 5, train_loss : 0.244413911183136
seed : 88, fold : 2, epoch : 5, valid_loss : 0.5268861493341579


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 6, train_loss : 0.2305190398202747
seed : 88, fold : 2, epoch : 6, valid_loss : 0.5072961032846716


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 7, train_loss : 0.19488064942006178
seed : 88, fold : 2, epoch : 7, valid_loss : 0.5110429203420459


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 8, train_loss : 0.1842279085288774
seed : 88, fold : 2, epoch : 8, valid_loss : 0.5086363562483217


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 2, epoch : 9, train_loss : 0.1652980876666831
seed : 88, fold : 2, epoch : 9, valid_loss : 0.5252763513785701
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 0, train_loss : 0.677846176804985
seed : 88, fold : 3, epoch : 0, valid_loss : 0.5859047989114295


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 1, train_loss : 0.5319839709886357
seed : 88, fold : 3, epoch : 1, valid_loss : 0.6511098297297923


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 2, train_loss : 0.4173447110700305
seed : 88, fold : 3, epoch : 2, valid_loss : 0.5376586981233304


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 3, train_loss : 0.3413643553433578
seed : 88, fold : 3, epoch : 3, valid_loss : 0.5813866474588345


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 4, train_loss : 0.2933399140122128
seed : 88, fold : 3, epoch : 4, valid_loss : 0.5336496489597258


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 5, train_loss : 0.2374617791752596
seed : 88, fold : 3, epoch : 5, valid_loss : 0.566375121983813


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 6, train_loss : 0.21127955219708067
seed : 88, fold : 3, epoch : 6, valid_loss : 0.5379385821367907


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 7, train_loss : 0.1907580035375624
seed : 88, fold : 3, epoch : 7, valid_loss : 0.5473425805373098


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 8, train_loss : 0.17940359747804374
seed : 88, fold : 3, epoch : 8, valid_loss : 0.5357045734292658


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 3, epoch : 9, train_loss : 0.16376254160169676
seed : 88, fold : 3, epoch : 9, valid_loss : 0.5335206846302987
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 0, train_loss : 0.6652322236849482
seed : 88, fold : 4, epoch : 0, valid_loss : 0.597998092881996


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 1, train_loss : 0.5198101428571346
seed : 88, fold : 4, epoch : 1, valid_loss : 0.5553836176973681


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 2, train_loss : 0.42368600623695307
seed : 88, fold : 4, epoch : 2, valid_loss : 0.5344620291049157


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 3, train_loss : 0.32606495922853485
seed : 88, fold : 4, epoch : 3, valid_loss : 0.5299380700950113


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 4, train_loss : 0.266497546811993
seed : 88, fold : 4, epoch : 4, valid_loss : 0.5065460380331034


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 5, train_loss : 0.23872333770214518
seed : 88, fold : 4, epoch : 5, valid_loss : 0.5132687999004121


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 6, train_loss : 0.21137724801019267
seed : 88, fold : 4, epoch : 6, valid_loss : 0.5157738508745474


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 7, train_loss : 0.19067693230954572
seed : 88, fold : 4, epoch : 7, valid_loss : 0.5248349438640239


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 8, train_loss : 0.20803180447877156
seed : 88, fold : 4, epoch : 8, valid_loss : 0.5171676391812816


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 88, fold : 4, epoch : 9, train_loss : 0.19318348643393685
seed : 88, fold : 4, epoch : 9, valid_loss : 0.5262715754235151
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_88 : 0.5362015299763564
--------------- SEED 100 is set ---------------


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 0, train_loss : 0.7142684334249046
seed : 100, fold : 0, epoch : 0, valid_loss : 0.5988019349976256


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 1, train_loss : 0.5478045026334519
seed : 100, fold : 0, epoch : 1, valid_loss : 0.6235667054453023


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 2, train_loss : 0.4439258246536
seed : 100, fold : 0, epoch : 2, valid_loss : 0.5632421568538781


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 3, train_loss : 0.37873343894359096
seed : 100, fold : 0, epoch : 3, valid_loss : 0.5665984730732432


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 4, train_loss : 0.3382307500115376
seed : 100, fold : 0, epoch : 4, valid_loss : 0.5623501939883482


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 5, train_loss : 0.32642222825770706
seed : 100, fold : 0, epoch : 5, valid_loss : 0.6173393468807048


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 6, train_loss : 0.2981639382917353
seed : 100, fold : 0, epoch : 6, valid_loss : 0.5404717258470356


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 7, train_loss : 0.25860578972202575
seed : 100, fold : 0, epoch : 7, valid_loss : 0.5615251730211486


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 8, train_loss : 0.23566275052290642
seed : 100, fold : 0, epoch : 8, valid_loss : 0.5373641024213717


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 0, epoch : 9, train_loss : 0.21360505505328367
seed : 100, fold : 0, epoch : 9, valid_loss : 0.5437597891323023
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 0, train_loss : 0.6994208497342933
seed : 100, fold : 1, epoch : 0, valid_loss : 0.5742511290813938


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 1, train_loss : 0.5491151763122253
seed : 100, fold : 1, epoch : 1, valid_loss : 0.5643971760360798


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 2, train_loss : 0.4475906735790983
seed : 100, fold : 1, epoch : 2, valid_loss : 0.5470694696146384


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 3, train_loss : 0.3714119458966119
seed : 100, fold : 1, epoch : 3, valid_loss : 0.5355612347720485


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 4, train_loss : 0.31326681179063764
seed : 100, fold : 1, epoch : 4, valid_loss : 0.523806210924751


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 5, train_loss : 0.27235216393700995
seed : 100, fold : 1, epoch : 5, valid_loss : 0.5135040733999464


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 6, train_loss : 0.25017943011780114
seed : 100, fold : 1, epoch : 6, valid_loss : 0.50928471820493


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 7, train_loss : 0.22432052282439507
seed : 100, fold : 1, epoch : 7, valid_loss : 0.5033951148213524


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 8, train_loss : 0.21943349984351346
seed : 100, fold : 1, epoch : 8, valid_loss : 0.5084504026087457


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 1, epoch : 9, train_loss : 0.1974260558957295
seed : 100, fold : 1, epoch : 9, valid_loss : 0.5454627209216747
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 0, train_loss : 0.7493182162184991
seed : 100, fold : 2, epoch : 0, valid_loss : 0.6053915002589035


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 1, train_loss : 0.5651629397143295
seed : 100, fold : 2, epoch : 1, valid_loss : 0.6668504997482372


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 2, train_loss : 0.4424563789630192
seed : 100, fold : 2, epoch : 2, valid_loss : 0.5119201398106358


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 3, train_loss : 0.3570864232219809
seed : 100, fold : 2, epoch : 3, valid_loss : 0.5569288604985669


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 4, train_loss : 0.30513236690705237
seed : 100, fold : 2, epoch : 4, valid_loss : 0.4958223245377435


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 5, train_loss : 0.25715914905334397
seed : 100, fold : 2, epoch : 5, valid_loss : 0.5416377659435819


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 6, train_loss : 0.22787195049544529
seed : 100, fold : 2, epoch : 6, valid_loss : 0.5013776341820018


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 7, train_loss : 0.20410467427334894
seed : 100, fold : 2, epoch : 7, valid_loss : 0.5252842095029319


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 8, train_loss : 0.20574759649563445
seed : 100, fold : 2, epoch : 8, valid_loss : 0.5030065377771229


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 2, epoch : 9, train_loss : 0.18626057534242108
seed : 100, fold : 2, epoch : 9, valid_loss : 0.532198924035146
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 0, train_loss : 0.7425348134456884
seed : 100, fold : 3, epoch : 0, valid_loss : 0.6190115985287719


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 1, train_loss : 0.5726775202749821
seed : 100, fold : 3, epoch : 1, valid_loss : 0.6548411378300257


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 2, train_loss : 0.4854640889707391
seed : 100, fold : 3, epoch : 2, valid_loss : 0.5741665756946114


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 3, train_loss : 0.4071978593606502
seed : 100, fold : 3, epoch : 3, valid_loss : 0.5738951581128323


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 4, train_loss : 0.3577701502644192
seed : 100, fold : 3, epoch : 4, valid_loss : 0.55057246880023


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 5, train_loss : 0.3143543981613484
seed : 100, fold : 3, epoch : 5, valid_loss : 0.5835257509152694


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 6, train_loss : 0.3018121608691543
seed : 100, fold : 3, epoch : 6, valid_loss : 0.5547904507925088


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 7, train_loss : 0.27263968894677265
seed : 100, fold : 3, epoch : 7, valid_loss : 0.5787501435059105


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 8, train_loss : 0.2608645611235854
seed : 100, fold : 3, epoch : 8, valid_loss : 0.5543385425893087


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 3, epoch : 9, train_loss : 0.2275535731291096
seed : 100, fold : 3, epoch : 9, valid_loss : 0.5546416858775682
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  after removing the cwd from sys.path.


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 0, train_loss : 0.6723312905571541
seed : 100, fold : 4, epoch : 0, valid_loss : 0.6194108536305181


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 1, train_loss : 0.5308273450129093
seed : 100, fold : 4, epoch : 1, valid_loss : 0.5985403147209152


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 2, train_loss : 0.41187509873526246
seed : 100, fold : 4, epoch : 2, valid_loss : 0.5466384358049398


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 3, train_loss : 0.33503815601225123
seed : 100, fold : 4, epoch : 3, valid_loss : 0.5487818247970566


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 4, train_loss : 0.31352853367224653
seed : 100, fold : 4, epoch : 4, valid_loss : 0.5315385936087391


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 5, train_loss : 0.25852060530247634
seed : 100, fold : 4, epoch : 5, valid_loss : 0.5349849903938403


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 6, train_loss : 0.221193401268489
seed : 100, fold : 4, epoch : 6, valid_loss : 0.5259750325883834


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 7, train_loss : 0.2026922191861128
seed : 100, fold : 4, epoch : 7, valid_loss : 0.5440638938544025


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 8, train_loss : 0.19636217193495084
seed : 100, fold : 4, epoch : 8, valid_loss : 0.5212345502664545


HBox(children=(FloatProgress(value=0.0, max=142.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=36.0), HTML(value='')))


seed : 100, fold : 4, epoch : 9, train_loss : 0.1684419514150867
seed : 100, fold : 4, epoch : 9, valid_loss : 0.5290026980132028
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
oof_score_100 : 0.541097785553403
&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%&%
all_oof_score_avg :  0.5390668589408393
all_oof_scores :  [0.5495463827980954, 0.5400387875270213, 0.5284498088493205, 0.5362015299763564, 0.541097785553403]
