In [1]:
import numpy as np 
import pandas as pd 
import os
       
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import matplotlib.pyplot as plt 

import transformers
import random


import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')

scaler = torch.cuda.amp.GradScaler() 

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Seed

In [3]:
SEED = 508

def random_seed(SEED):
    
    random.seed(SEED)
    os.environ['PYTHONHASHSEED'] = str(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True

random_seed(SEED)

In [4]:
train = pd.read_csv("../input/commonlitreadabilityprize/train.csv")
train.head()

Unnamed: 0,id,url_legal,license,excerpt,target,standard_error
0,c12129c31,,,When the young people returned to the ballroom...,-0.340259,0.464009
1,85aa80a4c,,,"All through dinner time, Mrs. Fayre was somewh...",-0.315372,0.480805
2,b69ac6792,,,"As Roger had predicted, the snow departed as q...",-0.580118,0.476676
3,dd1000b26,,,And outside before the palace a great garden w...,-1.054013,0.450007
4,37c1b32fb,,,Once upon a time there were Three Bears who li...,0.247197,0.510845


In [5]:
train.excerpt[0]

'When the young people returned to the ballroom, it presented a decidedly changed appearance. Instead of an interior scene, it was a winter landscape.\nThe floor was covered with snow-white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. The numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. Also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches.\nAt each end of the room, on the wall, hung a beautiful bear-skin rug.\nThese rugs were for prizes, one for the girls and one for the boys. And this was the game.\nThe girls were gathered at one end of the room and the boys at the other, and one end was called the North Pole, and the other the South Pole. Each player was given a small flag which they were to plant on reaching the Pole.\nThis would have been an easy matter, but each traveller was obliged to wear snowshoes.'

# Make Train fold

In [6]:
from sklearn.model_selection import KFold
skf=KFold(n_splits=5,shuffle=True,random_state=5)
train["fold"]=-1
X=train.sample(frac=1)
for i,(_,val_idx) in enumerate(skf.split(X)):
    train.loc[val_idx,"fold"]=i

In [7]:
test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
test

Unnamed: 0,id,url_legal,license,excerpt
0,c0f722661,,,My hope lay in Jack's promise that he would ke...
1,f0953f0a5,,,Dotty continued to go to Mrs. Gray's every nig...
2,0df072751,,,It was a bright and cheerful scene that greete...
3,04caf4e0c,https://en.wikipedia.org/wiki/Cell_division,CC BY-SA 3.0,Cell division is the process by which a parent...
4,0e63f8bea,https://en.wikipedia.org/wiki/Debugging,CC BY-SA 3.0,Debugging is the process of finding and resolv...
5,12537fe78,,,"To explain transitivity, let us look first at ..."
6,965e592c0,https://www.africanstorybook.org/#,CC BY 4.0,Milka and John are playing in the garden. Her ...


In [8]:
sample = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
sample

Unnamed: 0,id,target
0,c0f722661,0.0
1,f0953f0a5,0.0
2,0df072751,0.0
3,04caf4e0c,0.0
4,0e63f8bea,0.0
5,12537fe78,0.0
6,965e592c0,0.0


# Tokenizer Test

In [9]:
# kaggle offline mode: submit is offline because internet is offline. It comes from the published dataset.
tokenizer = transformers.BertTokenizer.from_pretrained("../input/bert-base-uncased")

# Please use this when using online such as local PC.
# tokenizer = transformers.BertTokenizer.from_pretrained("bert-base-uncased")

In [10]:
test_s = train["excerpt"].iloc[0]
test_s

'When the young people returned to the ballroom, it presented a decidedly changed appearance. Instead of an interior scene, it was a winter landscape.\nThe floor was covered with snow-white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. The numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. Also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches.\nAt each end of the room, on the wall, hung a beautiful bear-skin rug.\nThese rugs were for prizes, one for the girls and one for the boys. And this was the game.\nThe girls were gathered at one end of the room and the boys at the other, and one end was called the North Pole, and the other the South Pole. Each player was given a small flag which they were to plant on reaching the Pole.\nThis would have been an easy matter, but each traveller was obliged to wear snowshoes.'

In [11]:
result1=tokenizer.encode_plus(test_s)
result1

{'input_ids': [101, 2043, 1996, 2402, 2111, 2513, 2000, 1996, 14307, 1010, 2009, 3591, 1037, 27873, 2904, 3311, 1012, 2612, 1997, 2019, 4592, 3496, 1010, 2009, 2001, 1037, 3467, 5957, 1012, 1996, 2723, 2001, 3139, 2007, 4586, 1011, 2317, 10683, 1010, 2025, 4201, 2006, 15299, 1010, 2021, 19379, 21132, 2058, 18548, 1998, 2940, 25384, 1010, 2066, 1037, 2613, 4586, 2492, 1012, 1996, 3365, 9486, 1998, 16899, 2015, 2008, 2018, 7429, 1996, 2282, 1010, 2020, 9898, 2098, 2007, 13724, 1998, 25259, 2007, 25252, 1997, 6557, 1010, 2066, 4586, 1012, 2036, 6323, 6497, 2018, 2042, 8217, 11867, 6657, 19859, 2006, 2068, 1010, 1998, 20332, 6121, 24582, 20921, 5112, 2013, 1996, 5628, 1012, 2012, 2169, 2203, 1997, 1996, 2282, 1010, 2006, 1996, 2813, 1010, 5112, 1037, 3376, 4562, 1011, 3096, 20452, 1012, 2122, 20452, 2015, 2020, 2005, 11580, 1010, 2028, 2005, 1996, 3057, 1998, 2028, 2005, 1996, 3337, 1012, 1998, 2023, 2001, 1996, 2208, 1012, 1996, 3057, 2020, 5935, 2012, 2028, 2203, 1997, 1996, 2282, 1998, 

In [12]:
tokenizer.decode(result1["input_ids"])

'[CLS] when the young people returned to the ballroom, it presented a decidedly changed appearance. instead of an interior scene, it was a winter landscape. the floor was covered with snow - white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. the numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches. at each end of the room, on the wall, hung a beautiful bear - skin rug. these rugs were for prizes, one for the girls and one for the boys. and this was the game. the girls were gathered at one end of the room and the boys at the other, and one end was called the north pole, and the other the south pole. each player was given a small flag which they were to plant on reaching the pole. this would have been an easy matter, but each traveller was obliged to wear snowsho

In [13]:
sen_length = []

for sentence in tqdm(train["excerpt"]):

    token_words = tokenizer.encode_plus(sentence)["input_ids"]
    sen_length.append(len(token_words))

print('maxlenth of all sentences are  ', max(sen_length))

100%|██████████| 2834/2834 [00:15<00:00, 177.82it/s]

maxlenth of all sentences are   314





In [14]:
test_s

'When the young people returned to the ballroom, it presented a decidedly changed appearance. Instead of an interior scene, it was a winter landscape.\nThe floor was covered with snow-white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. The numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. Also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches.\nAt each end of the room, on the wall, hung a beautiful bear-skin rug.\nThese rugs were for prizes, one for the girls and one for the boys. And this was the game.\nThe girls were gathered at one end of the room and the boys at the other, and one end was called the North Pole, and the other the South Pole. Each player was given a small flag which they were to plant on reaching the Pole.\nThis would have been an easy matter, but each traveller was obliged to wear snowshoes.'

In [15]:
result2 = tokenizer.encode_plus(
    test_s,
    add_special_tokens = True, # Whether to insert [CLS], [SEP]
    max_length = 500,#314, # Align the number of words using padding and transcription
    pad_to_max_length = True, # Put [PAD] in the blank area
    
    truncation = True # Cutout function. For example, max_length10 is a function that makes only the first 10 characters. I got an alert if I didn't put it in, so I'll put it in
)

In [16]:
result2

{'input_ids': [101, 2043, 1996, 2402, 2111, 2513, 2000, 1996, 14307, 1010, 2009, 3591, 1037, 27873, 2904, 3311, 1012, 2612, 1997, 2019, 4592, 3496, 1010, 2009, 2001, 1037, 3467, 5957, 1012, 1996, 2723, 2001, 3139, 2007, 4586, 1011, 2317, 10683, 1010, 2025, 4201, 2006, 15299, 1010, 2021, 19379, 21132, 2058, 18548, 1998, 2940, 25384, 1010, 2066, 1037, 2613, 4586, 2492, 1012, 1996, 3365, 9486, 1998, 16899, 2015, 2008, 2018, 7429, 1996, 2282, 1010, 2020, 9898, 2098, 2007, 13724, 1998, 25259, 2007, 25252, 1997, 6557, 1010, 2066, 4586, 1012, 2036, 6323, 6497, 2018, 2042, 8217, 11867, 6657, 19859, 2006, 2068, 1010, 1998, 20332, 6121, 24582, 20921, 5112, 2013, 1996, 5628, 1012, 2012, 2169, 2203, 1997, 1996, 2282, 1010, 2006, 1996, 2813, 1010, 5112, 1037, 3376, 4562, 1011, 3096, 20452, 1012, 2122, 20452, 2015, 2020, 2005, 11580, 1010, 2028, 2005, 1996, 3057, 1998, 2028, 2005, 1996, 3337, 1012, 1998, 2023, 2001, 1996, 2208, 1012, 1996, 3057, 2020, 5935, 2012, 2028, 2203, 1997, 1996, 2282, 1998, 

In [17]:
tokenizer.decode(result2["input_ids"])

'[CLS] when the young people returned to the ballroom, it presented a decidedly changed appearance. instead of an interior scene, it was a winter landscape. the floor was covered with snow - white canvas, not laid on smoothly, but rumpled over bumps and hillocks, like a real snow field. the numerous palms and evergreens that had decorated the room, were powdered with flour and strewn with tufts of cotton, like snow. also diamond dust had been lightly sprinkled on them, and glittering crystal icicles hung from the branches. at each end of the room, on the wall, hung a beautiful bear - skin rug. these rugs were for prizes, one for the girls and one for the boys. and this was the game. the girls were gathered at one end of the room and the boys at the other, and one end was called the north pole, and the other the south pole. each player was given a small flag which they were to plant on reaching the pole. this would have been an easy matter, but each traveller was obliged to wear snowsho

In [18]:
train = train.sort_values("target").reset_index(drop=True)
train

Unnamed: 0,id,url_legal,license,excerpt,target,standard_error,fold
0,4626100d8,,,"The commutator is peculiar, consisting of only...",-3.676268,0.623621,0
1,493b80aa7,,,The Dunwich horror itself came between Lammas ...,-3.668360,0.571404,1
2,fe44cbd14,,,"The iron cylinder weighs 23 kilogrammes; but, ...",-3.642892,0.644398,1
3,284eaa5ad,,,As to surface-slope its measurement—from nearl...,-3.639936,0.603819,3
4,9e9eacb49,,,"The tree is dioecious, bearing male catkins on...",-3.636834,0.606822,0
...,...,...,...,...,...,...,...
2829,016913371,https://www.africanstorybook.org/,CC BY 4.0,Grandma's garden was wonderful. It was full of...,1.467665,0.599600,3
2830,7a1d484be,https://www.africanstorybook.org/,CC BY 4.0,More people came to the bus stop just before 9...,1.541672,0.606997,2
2831,8f35441e3,https://www.africanstorybook.org/#,CC BY 4.0,"Every day, Emeka's father took him to school i...",1.583847,0.624776,4
2832,849971671,https://www.africanstorybook.org/,CC BY 4.0,"For her last birthday, Sisanda had a special t...",1.597870,0.596349,4


In [19]:
# p_train = train[train["kfold"]!=0].reset_index(drop=True)
# p_valid = train[train["kfold"]==0].reset_index(drop=True)

In [20]:
class BERTDataset(Dataset):
    
    def __init__(self,sentences,targets,max_len=500):
        self.sentences = sentences
        self.targets = targets
        self.max_len=max_len
        
    def __len__(self):
        return len(self.sentences)
    
    def __getitem__(self,idx):
        sentence = self.sentences[idx]
        bert_sens = tokenizer.encode_plus(
                                sentence,
                                add_special_tokens = True, 
                                max_length =self.max_len,
                                pad_to_max_length = True, 
                                return_attention_mask = True)

        ids = torch.tensor(bert_sens['input_ids'], dtype=torch.long)
        mask = torch.tensor(bert_sens['attention_mask'], dtype=torch.long)
        token_type_ids = torch.tensor(bert_sens['token_type_ids'], dtype=torch.long)
        target = torch.tensor(self.targets[idx],dtype=torch.float)
        
        return {
                'ids': ids,
                'mask': mask,
                'token_type_ids': token_type_ids,
                'targets': target
            }

# CFG

In [21]:
class CFG:
    frac=1.0#0.1 
    train=True#False#
    pretrained=True #False#
    model= "BERT"#"R34"#"E4"#
    train_bs=12
    valid_bs=8#12
    test_bs=2
    epochs=30
    fold=0
    classes=1
    seed= 719
    lr=2e-4
    es=4


# Engine

In [22]:
# class Engine:
#     def __init__(self,model,optimizer,scheduler):
#         self.model=model
#         self.optimizer=optimizer
#         self.scheduler=scheduler
        
#     def get_accuracy(self,labels,preds):
#         total=labels.shape[0]
#         if preds.shape[1]==1:
#             l=labels
#             p=np.uint8(preds>0.5)
#         else:
#             p=preds.argmax(1).reshape(-1,1)
#             l=labels.reshape(-1,1)
#         return np.uint8(l==p).sum()/total
    
#     def loss_fn(self,outputs,targets):
#         return nn.MSELoss()(outputs,targets)  
    
#     def train(self,data_loader):
#         l_preds=[]
#         l_labels=[]
#         self.model.train()
#         final_loss=0
#         for data in data_loader:
#             self.optimizer.zero_grad()
#             print(data.shape)
            
#             ids=data["ids"].to(device,non_blocking=True)
#             mask=data["mask"].to(device,non_blocking=True)
#             tokentype=data["token_type_ids"].to(device,non_blocking=True)

#             outputs=self.model(ids,mask)
#             outputs=outputs["logits"].squeeze(-1)
#             targets=data["targets"].to(device,non_blocking=True)
#             loss=self.loss_fn(outputs,targets)
                
#             loss.backward()
#             self.optimizer.step()
            
#             ##accuracy
#             final_loss+=loss.item()
#             l_preds.append(outputs.cpu().detach().numpy().reshape(-1,1))
#             l_labels.append(targets.cpu().detach().numpy().reshape(-1,1))            
#             l_preds=np.concatenate(l_preds)
#             l_labels=np.concatenate(l_labels)
            
#         return np.mean(final_loss), l_preds, l_labels
    
#     def validate(self,data_loader):
#         device=self.device
#         preds_for_acc=[]
#         labels_for_acc=[]
#         self.model.eval()
#         final_loss=0
#         for data in data_loader:
#             ids=data["ids"].to(device,non_blocking=True)
#             mask=data["mask"].to(device,non_blocking=True)
#             tokentype=data["token_type_ids"].to(device,non_blocking=True)

#             with torch.no_grad():
#                 outputs=self.model(ids,mask)
#                 outputs=outputs["logits"].squeeze(-1)
#                 targets=data["targets"].to(device,non_blocking=True)

#                 loss=self.loss_fn(outputs,targets)
#                 final_loss+=loss.item()
            
#             ##accuracy
#             labels=targets.cpu().numpy().reshape(-1,1)
#             preds=outputs.cpu().detach().numpy()
#             if len(labels_for_acc)==0:
#                 labels_for_acc=labels
#                 preds_for_acc=preds
#             else:
#                 labels_for_acc=np.vstack((labels_for_acc,labels))
#                 preds_for_acc=np.vstack((preds_for_acc,preds))
#         accuracy=self.get_accuracy(labels_for_acc,preds_for_acc)
#         return final_loss/len(data_loader),accuracy,labels_for_acc,preds_for_acc

#     def predict(self,data_loader):
#         self.model.eval()
#         final_predictions=[]
#         for data in data_loader:
#             inputs=data["img"].to(self.device)
#             outputs=self.model(inputs)
#             outputs=outputs.cpu()
#             final_predictions.append(outputs.detach().numpy())
#         return final_predictions

In [23]:
device

device(type='cuda')

In [24]:
class Engine:
    def __init__(self,model,optimizer,device,classes):
        self.model=model
        self.optimizer=optimizer
        self.device=device
        self.classes=classes
        
    def get_accuracy(self,labels,preds):
        total=labels.shape[0]
        if preds.shape[1]==1:
            l=labels
            p=np.uint8(preds>0.5)
        else:
            p=preds.argmax(1).reshape(-1,1)
            l=labels.reshape(-1,1)
        return np.uint8(l==p).sum()/total
    
    def loss_fn(self,outputs,targets):
        return nn.MSELoss()(outputs,targets)  
    
    def train(self,data_loader):
        preds_for_acc=[]
        labels_for_acc=[]
        self.model.train()
        final_loss=0
        for data in data_loader:
            self.optimizer.zero_grad()
            ids=data["ids"].to(device,non_blocking=True)
            mask=data["mask"].to(device,non_blocking=True)
            tokentype=data["token_type_ids"].to(device,non_blocking=True)

            outputs=self.model(ids,mask)
            outputs=outputs["logits"].squeeze(-1)
            targets=data["targets"].to(device,non_blocking=True)
            
            loss=self.loss_fn(outputs,targets)
            
            loss.backward()
            self.optimizer.step()
            final_loss+=loss.item()
            
            ##accuracy
            labels=targets.cpu().numpy().reshape(-1,1)            
            preds=outputs.cpu().detach().numpy().reshape(-1,1)
            if len(labels_for_acc)==0:
                labels_for_acc=labels
                preds_for_acc=preds
            else:
                labels_for_acc=np.vstack((labels_for_acc,labels))
                preds_for_acc=np.vstack((preds_for_acc,preds))
        return final_loss/len(data_loader),labels_for_acc,preds_for_acc
    
    def validate(self,data_loader):
        preds_for_acc=[]
        labels_for_acc=[]
        self.model.eval()
        final_loss=0
        for data in data_loader:
            ids=data["ids"].to(device,non_blocking=True)
            mask=data["mask"].to(device,non_blocking=True)
            tokentype=data["token_type_ids"].to(device,non_blocking=True)

            with torch.no_grad():
                outputs=self.model(ids,mask)
                outputs=outputs["logits"].squeeze(-1)
                targets=data["targets"].to(device,non_blocking=True)

                loss=self.loss_fn(outputs,targets)
                final_loss+=loss.item()
            
            ##accuracy
            labels=targets.cpu().numpy().reshape(-1,1)
            preds=outputs.cpu().detach().numpy().reshape(-1,1)
            if len(labels_for_acc)==0:
                labels_for_acc=labels
                preds_for_acc=preds
            else:
                labels_for_acc=np.vstack((labels_for_acc,labels))
                preds_for_acc=np.vstack((preds_for_acc,preds))
        accuracy=self.get_accuracy(labels_for_acc,preds_for_acc)
        return final_loss/len(data_loader),labels_for_acc,preds_for_acc

    def predict(self,data_loader):
        self.model.eval()
        final_predictions=[]
        for data in data_loader:
            ids=data["ids"].to(device,non_blocking=True)
            mask=data["mask"].to(device,non_blocking=True)
            tokentype=data["token_type_ids"].to(device,non_blocking=True)
            outputs=self.model(ids,mask)
            outputs=outputs["logits"].squeeze(-1)
            outputs=outputs.cpu()
            final_predictions.append(outputs.detach().numpy().reshape(-1,1))
        return final_predictions

# train

In [25]:
import datetime
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
import gc

def my_log(str):
    tm=datetime.datetime.now().strftime("%H:%M:%S")
    print(f"{tm}, {str}")

In [26]:
def train_model(fold):
    df_train=train[train.fold!=fold].reset_index(drop=True)
    df_valid=train[train.fold==fold].reset_index(drop=True)
    
    df_train=df_train.sample(frac=CFG.frac,random_state=999)
    df_valid=df_valid.sample(frac=CFG.frac,random_state=999)

    train_dataset=BERTDataset(df_train.excerpt.values,df_train.target.values)
    valid_dataset=BERTDataset(df_valid.excerpt.values,df_valid.target.values)
    
    train_loader=DataLoader(train_dataset,batch_size=CFG.train_bs,shuffle=True,\
                            num_workers=4,pin_memory=True)
    valid_loader=DataLoader(valid_dataset,batch_size=CFG.valid_bs,shuffle=False,\
                            num_workers=4,pin_memory=True)

#     model = transformers.BertForSequenceClassification.from_pretrained("bert-base-uncased",\
#                                                                        num_labels=1)
    model = transformers.BertForSequenceClassification.from_pretrained(\
                                "../input/bert-base-uncased",num_labels=1)
    model.to(device)
    
    optimizer=AdamW(model.parameters(),CFG.lr,betas=(0.9,0.999),weight_decay=1e-2)    
    train_steps=int(len(df_train)/CFG.train_bs*CFG.epochs)
    num_steps=int(train_steps*.1)
    scheduler=get_linear_schedule_with_warmup(optimizer,num_steps,train_steps)
    
    engine=Engine(model,optimizer,device,1)
    
    best_loss=np.inf
    early_stopping_cnt=0
    
    for epoch in range(CFG.epochs):
        t_loss,t_labels,t_preds=engine.train(train_loader)
        v_loss,v_labels,v_preds=engine.validate(valid_loader)
        scheduler.step(v_loss)
        
        my_log(f"fold={fold},epoch={epoch},t_loss={t_loss:.4f},v_loss={v_loss:.4f}")
        if v_loss<best_loss:
            best_loss=v_loss
            torch.save(model.state_dict(),f"model_{CFG.model}_fold_{fold}.bin")
            early_stopping_cnt=0
        else:
            early_stopping_cnt+=1
        if early_stopping_cnt>=CFG.es:
            break
            
            
    del train_dataset,valid_dataset,train_loader,valid_loader,model,optimizer,scheduler
    _ = gc.collect()    

In [27]:
train_model(fold=0)
train_model(fold=1)
train_model(fold=2)
train_model(fold=3)
train_model(fold=4)


Some weights of the model checkpoint at ../input/bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model chec

14:41:40, fold=0,epoch=0,t_loss=2.1271,v_loss=2.2430


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

14:41:56, fold=0,epoch=1,t_loss=0.9972,v_loss=0.6122


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

14:42:12, fold=0,epoch=2,t_loss=0.8198,v_loss=0.5288


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

14:42:27, fold=0,epoch=3,t_loss=0.6452,v_loss=0.3949


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-s

14:42:43, fold=0,epoch=4,t_loss=0.4771,v_loss=0.3253


In [28]:
import os
def predict(fold):
    test_df=test
    test_dataset=BERTDataset(test_df.excerpt.values,np.zeros(test_df.shape[0]))
    test_loader=DataLoader(test_dataset,batch_size=CFG.test_bs,shuffle=False,\
                            num_workers=4,pin_memory=True)

    model = transformers.BertForSequenceClassification.from_pretrained(\
                                "../input/bert-base-uncased",num_labels=1)
    model_save_path=f"model_{CFG.model}_fold_{fold}.bin"
    model.load_state_dict(torch.load(model_save_path))
    model=model.to(device)

    engine=Engine(model,None,device,classes=1)
    preds=engine.predict(test_loader)
    preds=np.vstack(preds)
    return preds

In [29]:
p=predict(0)
p

Some weights of the model checkpoint at ../input/bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model chec

array([[-0.37524784],
       [-0.3924919 ],
       [-0.5704994 ],
       [-1.5668049 ],
       [-1.3243016 ],
       [-0.2674231 ],
       [-0.27797174]], dtype=float32)

In [31]:
sample["target"]=p.reshape(-1)

In [32]:
sample.to_csv("submission.csv",index=False)

In [33]:
sample

Unnamed: 0,id,target
0,c0f722661,-0.375248
1,f0953f0a5,-0.392492
2,0df072751,-0.570499
3,04caf4e0c,-1.566805
4,0e63f8bea,-1.324302
5,12537fe78,-0.267423
6,965e592c0,-0.277972
