# Versions

* Version 1: CV:-0.0.4875 LB:-

# Imports

In [1]:
# !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
# !python pytorch-xla-env-setup.py --version 1.7 --apt-packages libomp5 libopenblas-dev

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
#General
import numpy as np 
import pandas as pd 
from tqdm import tqdm
from glob import glob
import os

#Sklearn
from sklearn.model_selection import train_test_split , KFold

#Pytorch
import torch 
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F

#Pytorch Lightning
import pytorch_lightning as pl
from pytorch_lightning import seed_everything , Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

#Hugging Face
from transformers import AutoModel , AutoConfig , AutoTokenizer ,  AdamW, get_linear_schedule_with_warmup,get_constant_schedule_with_warmup,get_cosine_schedule_with_warmup

In [3]:
paths = glob("../input/commonlitreadabilityprize/*.csv")
paths = sorted(paths)

df_ss = pd.read_csv(paths[0])
df_test = pd.read_csv(paths[1])
df_train = pd.read_csv(paths[2])

# Configs

In [4]:
class config:
    seed = 123
    batch_size = 16
    epochs = 20
    transformer_name = "roberta-base"
    transformer_path = "../input/huggingface-roberta/roberta-base"
    max_len = 250
    learning_rate = 2e-5
    save_dir = "./result"

seed_everything(config.seed)

123

In [5]:
if not os.path.exists(config.save_dir):
    os.makedirs(config.save_dir)

# Dataset

In [6]:
class CLRDataset:
    def __init__(self, name , dataset):
        self.tokenizer = AutoTokenizer.from_pretrained(name)
        self.max_len = config.max_len
        
        self.excerpt = dataset['excerpt'].to_numpy()
        self.target = dataset['target'].to_numpy()
        

    def __len__(self):
        return len(self.excerpt)
    
    def __getitem__(self , idx):
        text = self.excerpt[idx]
        target = self.target[idx]
        tokenized_text = self.tokenizer(text, truncation = True , padding = "max_length" , max_length= self.max_len )
        
        return {'input_ids': torch.tensor(tokenized_text['input_ids'], dtype = torch.long),
                'attention_mask' : torch.tensor(tokenized_text['attention_mask'] , dtype = torch.long),
               'target': torch.tensor([target],dtype = torch.float)}

# Model

In [7]:
def loss_function(output,target):
    return torch.sqrt(nn.MSELoss()(output,target))

In [8]:
class AttentionHead(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(768,512)
        self.tanh = nn.Tanh()
        self.linear2 = nn.Linear(512,1)
        self.softmax = nn.Softmax(dim = 1)
    
    def forward(self,input_tensors):
        x = self.linear(input_tensors)
        x = self.tanh(x)
        x = self.linear2(x)
        x = self.softmax(x)
        
        return x

class TransformerModel(nn.Module):
    def __init__(self,name):
        super().__init__()
        self.transformer_config = AutoConfig.from_pretrained(name)
        self.transformer_config.update({"output_hidden_states":True, "hidden_dropout_prob": 0.0,"layer_norm_eps": 1e-7}) 
        self.transformer_model = AutoModel.from_pretrained(name , config = self.transformer_config)
        
    def forward(self,input_ids, attention_mask):
        transformer_output = self.transformer_model(input_ids = input_ids , attention_mask = attention_mask)
        transformer_output = transformer_output.hidden_states[-1]
        
        return transformer_output

class RegressionHead(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(768,1)
    
    def forward(self,input_tensors):
        x = self.linear(input_tensors)
        
        return x

class CLRModel(pl.LightningModule):
    def __init__(self,name , train , validation):
        super().__init__()
        self._train_dataloader = train
        self._val_dataloader = validation
        self._test_dataloader = validation
        self.name = name
        self.transformer_model = TransformerModel(self.name)
        self.regression_head = RegressionHead()
        self.attention_head = AttentionHead()
        self.save_hyperparameters()
    
    def forward(self,input_ids , attention_mask):
        last_layer_hidden_states = self.transformer_model(input_ids , attention_mask)
        weights = self.attention_head(last_layer_hidden_states) 
        context_vector = torch.sum(weights * last_layer_hidden_states, dim=1)  
        output = self.regression_head(context_vector)
        
        return output
    
    def training_step(self,batch,batch_idx):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        target = batch['target']
        

        output = self.forward(input_ids , attention_mask)
        loss = loss_function(output,target) 
        self.log('train_loss', loss , prog_bar=True)

        return {'loss': loss}

    def train_epoch_end(self, outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        print(f'epoch {trainer.current_epoch} training loss {avg_loss}')
        return {'train_loss': avg_loss}    
    
    
    def validation_step(self,batch,batch_idx):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        target = batch['target']
        

        output = self.forward(input_ids , attention_mask)
        loss = loss_function(output,target) 
        self.log('val_loss', loss, prog_bar=True)
        
        return {'val_loss': loss}
    

    def validation_epoch_end(self, outputs):

        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        print(f'epoch {trainer.current_epoch} validation loss {avg_loss}')
        return {'val_loss': avg_loss}
    
    
    def test_step(self, batch,batch_idx):
        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        target = batch['target']

        output = self.forward(input_ids , attention_mask)
        loss = loss_function(output,target) 
        self.log('test_loss', loss)
        return {'test_loss': loss}
    
    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
        print(f'epoch {trainer.current_epoch} test loss {avg_loss}')
        return {'test_loss': avg_loss}
    
    def train_dataloader(self):
        return self._train_dataloader
    
    def val_dataloader(self):
        return self._val_dataloader
    
    def test_dataloader(self):
        return self._val_dataloader
    
    def configure_optimizers(self):
        """optimizer = AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)
        lr_scheduler = get_constant_schedule_with_warmup(optimizer, 100)"""
        named_parameters = list(model.named_parameters())    
    
        roberta_parameters = named_parameters[:197]    
        attention_parameters = named_parameters[199:203]
        regressor_parameters = named_parameters[203:]

        attention_group = [params for (name, params) in attention_parameters]
        regressor_group = [params for (name, params) in regressor_parameters]
        parameters = []
        parameters.append({"params": attention_group})
        parameters.append({"params": regressor_group})

        for layer_num, (name, params) in enumerate(roberta_parameters):
            weight_decay = 0.0 if "bias" in name else 0.01

            lr = 2e-5

            if layer_num >= 69:        
                lr = 5e-5

            if layer_num >= 133:
                lr = 1e-4

            parameters.append({"params": params,
                               "weight_decay": weight_decay,
                               "lr": lr})
        
        optimizer = AdamW(parameters)            
        scheduler = get_cosine_schedule_with_warmup(
            optimizer,
            num_training_steps= config.epochs * len(train_dataloader),
            num_warmup_steps=50)
            

        
        return [optimizer], [scheduler]
    
    


In [9]:
from pytorch_lightning.callbacks import ProgressBarBase ,ProgressBar


class EpochProgressBar(ProgressBarBase):

    def __init__(self):
        super().__init__()
        self.bar = None

    def on_train_start(self, trainer, pl_module):
        self.bar = tqdm(
            desc='Epoch',
            leave=False,
            dynamic_ncols=True,
            total=trainer.max_epochs,
        )

    def on_train_epoch_end(self, *args, **kwargs):
        self.bar.update(1)

class LitProgressBar(ProgressBar):
    def __init__(self):
        super().__init__()
        
    def on_train_epoch_start(self, trainer, pl_module):
        if trainer.current_epoch:
            print()
        super().on_train_epoch_start(trainer, pl_module)

In [11]:
scores=[]
iterations = 1
kfold = KFold(n_splits=5, shuffle= True , random_state = config.seed)
for train_idx, test_idx in kfold.split(df_train):
    print("************** iteration",iterations,"**************")
    
    train_data = df_train.loc[train_idx]
    validation_data = df_train.loc[test_idx]
    
    train = CLRDataset(config.transformer_path,train_data)
    valid = CLRDataset(config.transformer_path,validation_data)
    
    train_dataloader = DataLoader(train , batch_size = config.batch_size , shuffle = True , num_workers=4,pin_memory=False)
    validation_dataloader = DataLoader(valid , batch_size = config.batch_size , shuffle = False , num_workers=4,pin_memory=False)
    checkpoint_callback = ModelCheckpoint(monitor='val_loss',
                                          dirpath= config.save_dir,
                                      save_top_k=1,
                                      save_last= False,
                                      save_weights_only=True,
                                      filename= f"./fold_{iterations}",
                                      verbose= True,
                                      mode='min')
    
    model = CLRModel(config.transformer_path, train_dataloader , validation_dataloader)
    trainer = Trainer(max_epochs= config.epochs,gpus =1 , progress_bar_refresh_rate=10,callbacks=[checkpoint_callback])
    trainer.fit(model , train_dataloader , validation_dataloader)
    
    #predictions
    print('predicting')
    model_test = CLRModel.load_from_checkpoint(f'result/fold_{iterations}.ckpt')
    a = trainer.test(model_test)
    
    iterations +=1 

************** iteration 1 **************


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Validation sanity check: 0it [00:00, ?it/s]

epoch 0 validation loss 1.5043253898620605


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch 0 validation loss 1.5343905687332153


Validating: 0it [00:00, ?it/s]

epoch 1 validation loss 0.6941792964935303


Validating: 0it [00:00, ?it/s]

epoch 2 validation loss 0.5855439305305481


Validating: 0it [00:00, ?it/s]

epoch 3 validation loss 0.5505715608596802


Validating: 0it [00:00, ?it/s]

epoch 4 validation loss 0.5471032857894897


Validating: 0it [00:00, ?it/s]

epoch 5 validation loss 0.5356782078742981


Validating: 0it [00:00, ?it/s]

epoch 6 validation loss 0.5748260617256165


Validating: 0it [00:00, ?it/s]

epoch 7 validation loss 0.543407142162323


Validating: 0it [00:00, ?it/s]

epoch 8 validation loss 0.5190924406051636


Validating: 0it [00:00, ?it/s]

epoch 9 validation loss 0.5280966758728027


Validating: 0it [00:00, ?it/s]

epoch 10 validation loss 0.5238869190216064


Validating: 0it [00:00, ?it/s]

epoch 11 validation loss 0.5370643734931946


Validating: 0it [00:00, ?it/s]

epoch 12 validation loss 0.5186502933502197


Validating: 0it [00:00, ?it/s]

epoch 13 validation loss 0.5450088381767273


Validating: 0it [00:00, ?it/s]

epoch 14 validation loss 0.5447915196418762


Validating: 0it [00:00, ?it/s]

epoch 15 validation loss 0.5608595013618469


Validating: 0it [00:00, ?it/s]

epoch 16 validation loss 0.5324256420135498


Validating: 0it [00:00, ?it/s]

epoch 17 validation loss 0.5250145792961121


Validating: 0it [00:00, ?it/s]

epoch 18 validation loss 0.5055640339851379


Validating: 0it [00:00, ?it/s]

epoch 19 validation loss 0.5065579414367676
predicting


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing: 0it [00:00, ?it/s]

epoch 19 test loss 0.5055640339851379
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.5041112899780273}
--------------------------------------------------------------------------------
************** iteration 2 **************


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Validation sanity check: 0it [00:00, ?it/s]

epoch 0 validation loss 0.7103824615478516


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch 0 validation loss 1.1000523567199707


Validating: 0it [00:00, ?it/s]

epoch 1 validation loss 0.6558171510696411


Validating: 0it [00:00, ?it/s]

epoch 2 validation loss 0.5495833158493042


Validating: 0it [00:00, ?it/s]

epoch 3 validation loss 0.5141873359680176


Validating: 0it [00:00, ?it/s]

epoch 4 validation loss 0.5229269862174988


Validating: 0it [00:00, ?it/s]

epoch 5 validation loss 0.48832327127456665


Validating: 0it [00:00, ?it/s]

epoch 6 validation loss 0.5002201795578003


Validating: 0it [00:00, ?it/s]

epoch 7 validation loss 0.5138178467750549


Validating: 0it [00:00, ?it/s]

epoch 8 validation loss 0.5132287740707397


Validating: 0it [00:00, ?it/s]

epoch 9 validation loss 0.5438933968544006


Validating: 0it [00:00, ?it/s]

epoch 10 validation loss 0.5090841054916382


Validating: 0it [00:00, ?it/s]

epoch 11 validation loss 0.543070375919342


Validating: 0it [00:00, ?it/s]

epoch 12 validation loss 0.4930374324321747


Validating: 0it [00:00, ?it/s]

epoch 13 validation loss 0.5022643208503723


Validating: 0it [00:00, ?it/s]

epoch 14 validation loss 0.49371662735939026


Validating: 0it [00:00, ?it/s]

epoch 15 validation loss 0.5100337266921997


Validating: 0it [00:00, ?it/s]

epoch 16 validation loss 0.48971787095069885


Validating: 0it [00:00, ?it/s]

epoch 17 validation loss 0.49080732464790344


Validating: 0it [00:00, ?it/s]

epoch 18 validation loss 0.4796077013015747


Validating: 0it [00:00, ?it/s]

epoch 19 validation loss 0.4782487452030182
predicting


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing: 0it [00:00, ?it/s]

epoch 19 test loss 0.4782487452030182
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.47790032625198364}
--------------------------------------------------------------------------------
************** iteration 3 **************


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Validation sanity check: 0it [00:00, ?it/s]

epoch 0 validation loss 0.7757818102836609


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch 0 validation loss 1.0586869716644287


Validating: 0it [00:00, ?it/s]

epoch 1 validation loss 0.6514738202095032


Validating: 0it [00:00, ?it/s]

epoch 2 validation loss 0.5546598434448242


Validating: 0it [00:00, ?it/s]

epoch 3 validation loss 0.5191848874092102


Validating: 0it [00:00, ?it/s]

epoch 4 validation loss 0.5203459858894348


Validating: 0it [00:00, ?it/s]

epoch 5 validation loss 0.5543216466903687


Validating: 0it [00:00, ?it/s]

epoch 6 validation loss 0.5371251702308655


Validating: 0it [00:00, ?it/s]

epoch 7 validation loss 0.5020921230316162


Validating: 0it [00:00, ?it/s]

epoch 8 validation loss 0.5310860872268677


Validating: 0it [00:00, ?it/s]

epoch 9 validation loss 0.5163707137107849


Validating: 0it [00:00, ?it/s]

epoch 10 validation loss 0.5199970602989197


Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch 12 validation loss 0.49819862842559814


Validating: 0it [00:00, ?it/s]

epoch 13 validation loss 0.5016390681266785


Validating: 0it [00:00, ?it/s]

epoch 14 validation loss 0.506993293762207


Validating: 0it [00:00, ?it/s]

epoch 15 validation loss 0.5037564039230347


Validating: 0it [00:00, ?it/s]

epoch 16 validation loss 0.49378374218940735


Validating: 0it [00:00, ?it/s]

epoch 17 validation loss 0.5002413392066956


Validating: 0it [00:00, ?it/s]

epoch 18 validation loss 0.4941180646419525


Validating: 0it [00:00, ?it/s]

epoch 19 validation loss 0.49516603350639343
predicting


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing: 0it [00:00, ?it/s]

epoch 19 test loss 0.49378374218940735
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.4916948080062866}
--------------------------------------------------------------------------------
************** iteration 4 **************


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Validation sanity check: 0it [00:00, ?it/s]

epoch 0 validation loss 0.8175176382064819


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch 0 validation loss 1.098875641822815


Validating: 0it [00:00, ?it/s]

epoch 1 validation loss 0.6376608610153198


Validating: 0it [00:00, ?it/s]

epoch 2 validation loss 0.5344353914260864


Validating: 0it [00:00, ?it/s]

epoch 3 validation loss 0.5257354378700256


Validating: 0it [00:00, ?it/s]

epoch 4 validation loss 0.49670976400375366


Validating: 0it [00:00, ?it/s]

epoch 5 validation loss 0.5176390409469604


Validating: 0it [00:00, ?it/s]

epoch 7 validation loss 0.5955437421798706


Validating: 0it [00:00, ?it/s]

epoch 8 validation loss 0.5204251408576965


Validating: 0it [00:00, ?it/s]

epoch 9 validation loss 0.5201459527015686


Validating: 0it [00:00, ?it/s]

epoch 10 validation loss 0.5451594591140747


Validating: 0it [00:00, ?it/s]

epoch 11 validation loss 0.5005325675010681


Validating: 0it [00:00, ?it/s]

epoch 12 validation loss 0.4964125454425812


Validating: 0it [00:00, ?it/s]

epoch 13 validation loss 0.5189399719238281


Validating: 0it [00:00, ?it/s]

epoch 14 validation loss 0.5207920074462891


Validating: 0it [00:00, ?it/s]

epoch 15 validation loss 0.5051115155220032


Validating: 0it [00:00, ?it/s]

epoch 16 validation loss 0.508221447467804


Validating: 0it [00:00, ?it/s]

epoch 17 validation loss 0.5036753416061401


Validating: 0it [00:00, ?it/s]

epoch 18 validation loss 0.5010760426521301


Validating: 0it [00:00, ?it/s]

epoch 19 validation loss 0.4903026819229126
predicting


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing: 0it [00:00, ?it/s]

epoch 19 test loss 0.4903026819229126
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.4912501275539398}
--------------------------------------------------------------------------------
************** iteration 5 **************


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Validation sanity check: 0it [00:00, ?it/s]

epoch 0 validation loss 0.7363137006759644


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

epoch 0 validation loss 1.0146530866622925


Validating: 0it [00:00, ?it/s]

epoch 1 validation loss 0.6817278265953064


Validating: 0it [00:00, ?it/s]

epoch 2 validation loss 0.5936357975006104


Validating: 0it [00:00, ?it/s]

epoch 3 validation loss 0.5187280774116516


Validating: 0it [00:00, ?it/s]

epoch 4 validation loss 0.5033672451972961


Validating: 0it [00:00, ?it/s]

epoch 5 validation loss 0.5190524458885193


Validating: 0it [00:00, ?it/s]

epoch 6 validation loss 0.5081059336662292


Validating: 0it [00:00, ?it/s]

epoch 7 validation loss 0.5151649117469788


Validating: 0it [00:00, ?it/s]

epoch 8 validation loss 0.5066143870353699


Validating: 0it [00:00, ?it/s]

epoch 9 validation loss 0.5011321306228638


Validating: 0it [00:00, ?it/s]

epoch 10 validation loss 0.5159334540367126


Validating: 0it [00:00, ?it/s]

epoch 11 validation loss 0.50283282995224


Validating: 0it [00:00, ?it/s]

epoch 12 validation loss 0.513927698135376


Validating: 0it [00:00, ?it/s]

epoch 13 validation loss 0.4902031123638153


Validating: 0it [00:00, ?it/s]

epoch 14 validation loss 0.4838433265686035


Validating: 0it [00:00, ?it/s]

epoch 15 validation loss 0.4907970428466797


Validating: 0it [00:00, ?it/s]

epoch 16 validation loss 0.47520574927330017


Validating: 0it [00:00, ?it/s]

epoch 17 validation loss 0.48754173517227173


Validating: 0it [00:00, ?it/s]

epoch 18 validation loss 0.47380003333091736


Validating: 0it [00:00, ?it/s]

epoch 19 validation loss 0.46861532330513
predicting


Some weights of the model checkpoint at ../input/huggingface-roberta/roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Testing: 0it [00:00, ?it/s]

epoch 19 test loss 0.46861532330513
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_loss': 0.47284188866615295}
--------------------------------------------------------------------------------


In [13]:
(0.47284188866615295 + 0.4912501275539398 + 0.4916948080062866 + 0.47790032625198364 + 0.5041112899780273)/5

0.4875596880912781