## **Loading data**

In [3]:
import nvidia_smi
!nvidia-smi

Thu Feb 10 10:06:32 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
!pip install transformers==4.5.1



In [5]:
!pip install torch==1.7.0



In [6]:
import pandas as pd
import numpy as np
import warnings
import random
import time
import torch
import torch.nn as TorchNeuralNetwork
import matplotlib.pyplot as plot
from math import sqrt as SquareRoot
from sklearn.metrics import mean_squared_error
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from transformers import AutoConfig
from transformers import AutoModel
from transformers import AutoTokenizer
import gc
gc.enable()
CUDA = "cuda"
CPU = "cpu"
warnings.filterwarnings('ignore')

In [7]:
#mouting google drive
from google.colab import drive
import os
drive.mount('/content/gdrive')
os.chdir("/content/gdrive/MyDrive/commonlit_final/")
cwd = os.getcwd()
print(cwd)

Mounted at /content/gdrive
/content/gdrive/MyDrive/commonlit_final


**Model Parameters**

In [8]:
RBS_MODEL = "transformers_roberta_base_squad2"
RBS_MODEL_TOKENIZER = "transformers_roberta_base_squad2"

In [9]:
RLS_MODEL = "transformers_roberta_large_squad2"
RLS_MODEL_TOKENIZER = "transformers_roberta_large_squad2"

In [10]:
Deberta_MODEL = "transformers_deberta_large"
Deberta_MODEL_TOKENIZER = "transformers_deberta_large"

In [11]:
Electra_MODEL = "transformers_electra_large_discriminator"
Electra_MODEL_TOKENIZER = "transformers_electra_large_discriminator"

In [12]:
MODEL_BATCH = 16

In [13]:
#Model Parameters
MODEL_LENGTH = 248
MODEL_PAD = "max_length"
MODEL_NORM = 1e-7
MODEL_PROB = 0.0
MODEL_HIDDEN_1 = 1
MODEL_HIDDEN_2 = 512
MODEL_SEED_VALUE = 1000
MODEL_CPU_WORKERS = 2
DIMENSION_1 = 768
DIMENSION_2 = 1024

**Model Tokenizer**

In [14]:
#roberta-base-squad2 tokenizer
rbs_model_token = AutoTokenizer.from_pretrained(RBS_MODEL_TOKENIZER)

In [15]:
#roberta-large-squad2 tokenizer
rls_model_token = AutoTokenizer.from_pretrained(RLS_MODEL_TOKENIZER)

In [16]:
#deberta tokenizer
deberta_model_token = AutoTokenizer.from_pretrained(Deberta_MODEL_TOKENIZER)

In [17]:
#electra tokenizer
electra_model_token = AutoTokenizer.from_pretrained(Electra_MODEL_TOKENIZER)

## **Commonlit Classes and functions**

In [18]:
'''
TorchNeuralNetwork.Module   Base class for all neural network modules.
Here we are creating NLP architecture to get prediction score.
The forward function of this class will be called when we pass input.
For example: return = CommonLit_Architecture(**input) . The forward function will get called to get return.
'''

'\nTorchNeuralNetwork.Module   Base class for all neural network modules.\nHere we are creating NLP architecture to get prediction score.\nThe forward function of this class will be called when we pass input.\nFor example: return = CommonLit_Architecture(**input) . The forward function will get called to get return.\n'

In [19]:
class CommonLit_Architecture(TorchNeuralNetwork.Module): 
    def __init__(self,commonlit_model):
        super().__init__()

        #checking for transformer model
        if (commonlit_model == "rbs") :
          MODEL = RBS_MODEL
          MODEL_HIDDEN_3 = DIMENSION_1

        elif (commonlit_model == "rls") :
          MODEL = RLS_MODEL
          MODEL_HIDDEN_3 = DIMENSION_2

        elif (commonlit_model == "deberta") :
          MODEL = Deberta_MODEL
          MODEL_HIDDEN_3 = DIMENSION_2

        elif (commonlit_model == "electra") :
          MODEL = Electra_MODEL
          MODEL_HIDDEN_3 = DIMENSION_2

        #Download configuration from huggingface.co
        commonlit_cfg = AutoConfig.from_pretrained(MODEL)

        #Chaning parameters of configuration file
        commonlit_cfg.update({"output_hidden_states":True, "hidden_dropout_prob": MODEL_PROB,"layer_norm_eps": MODEL_NORM}) 

        #The architecture we want to use can be get from the name or the path of the pretrained model we are supplying to the from_pretrained method.
        #AutoClasses are here to do this job for us so that we can automatically retrieve the relevant model given the name/path to the pretrained weights/config/vocabulary:
        self.model = AutoModel.from_pretrained(MODEL, config=commonlit_cfg) 

        #Attention_Head(2nd Fine Tuning Strategy)     
        self.attn_head = TorchNeuralNetwork.Sequential(            
            TorchNeuralNetwork.Linear(MODEL_HIDDEN_3, MODEL_HIDDEN_2),            
            TorchNeuralNetwork.Tanh(),                       
            TorchNeuralNetwork.Linear(MODEL_HIDDEN_2, MODEL_HIDDEN_1),
            TorchNeuralNetwork.Softmax(dim=MODEL_HIDDEN_1)
        )

        #regression layer
        self.linear_reg = TorchNeuralNetwork.Sequential(                        
            TorchNeuralNetwork.Linear(MODEL_HIDDEN_3, MODEL_HIDDEN_1)                        
        ) 
        
    #forward function
    def forward(self, commonlit_encode, commonlit_attn,Concatenate_Last_4_Layers):

        #output of last layer of transformer
        model_hidden = self.model(input_ids=commonlit_encode,attention_mask=commonlit_attn)  #shape: Batch_Size*Sequence_Length*Hidden_Size     

        if not Concatenate_Last_4_Layers:
          #Use only output of last layer of transformer
          model_hidden_stack_mean = model_hidden.hidden_states[-1]
        else:
          #Concatenate Last 4 layers(1st Fine Tuning Strategy)
          model_hidden_stack = torch.stack([model_hidden.hidden_states[-1],model_hidden.hidden_states[-2],model_hidden.hidden_states[-3],model_hidden.hidden_states[-4]]) 
                                                                                                                #shape: 4*Batch_Size*Sequence_Length*Hidden_Size 
          model_hidden_stack_mean =  torch.mean(model_hidden_stack, 0) #shape:Batch_Size*Sequence_Length*Hidden_Size

        #Getting weigths from Attention_Head Network
        model_hidden_weight = self.attn_head(model_hidden_stack_mean) #shape:*Batch_Size*Sequence_Length*1
                
        # Multiplying weigths(model_hidden_weight) from Attention_Head Network by Output of last layer of network(model_hidden_stack_mean). 
        # model_hidden_weight * model_hidden_stack_mean
        # Then averaging the tensor across sequence Length Dimension.
        # The output(model_hidden_vec) will be passed to Regression Layer to get prediction score.
        model_hidden_vec = torch.sum(model_hidden_weight * model_hidden_stack_mean, dim=1) #shape:*Batch_Size*Hidden_Size     
    
        return self.linear_reg(model_hidden_vec)
    

In [20]:
'''
This function will give input to commonlit architecuture and return the output.
'''

'\nThis function will give input to commonlit architecuture and return the output.\n'

In [21]:
def commonlit_test(commonlit_arch, commonlit_iterable,Concatenate_Last_4_Layers):

  #sets model in evaluation mode
  commonlit_arch.eval()

  commonlit_iter_len = len(commonlit_iterable.dataset)
  commonlit_pred = np.zeros(commonlit_iter_len)    
  commonlit_row = 0

  #context-manager that disabled gradient calculation  
  with torch.no_grad():

    #enumerating over test data
    for _, (commonlit_encode, commonlit_attn) in enumerate(commonlit_iterable):
      commonlit_attn = commonlit_attn.to(CUDA)
      commonlit_encode = commonlit_encode.to(CUDA)        

      #getting return from commonlit architecture                  
      commonlit_return = commonlit_arch(commonlit_encode, commonlit_attn,Concatenate_Last_4_Layers)                        

      # Flattens input by reshaping it into a one-dimensional tensor. 
      commonlit_flatten = commonlit_return.flatten()
      commonlit_pred[commonlit_row : commonlit_row + commonlit_return.shape[0]] = commonlit_flatten.to(CPU)
      commonlit_row = commonlit_return.shape[0] + commonlit_row

  return commonlit_pred

In [22]:
'''
PyTorch provides two data primitives: torch.utils.data.DataLoader and torch.utils.data.Dataset that allow us to use your own data.
Dataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset to enable easy access to the samples.
A custom Dataset class has three functions: __init__, __len__, and __getitem__.
'''

'\nPyTorch provides two data primitives: torch.utils.data.DataLoader and torch.utils.data.Dataset that allow us to use your own data.\nDataset stores the samples and their corresponding labels, and DataLoader wraps an iterable around the Dataset to enable easy access to the samples.\nA custom Dataset class has three functions: __init__, __len__, and __getitem__.\n'

In [23]:
class CommonLit_Item(Dataset):
  #run once when instantiating the Dataset object
  def __init__(self, data,commonlit_model,io=False,):
    super().__init__()

    #checking for transformer model
    if (commonlit_model == "rbs") :
      model_token = rbs_model_token

    elif (commonlit_model == "rls") :
      model_token = rls_model_token

    elif (commonlit_model == "deberta") :
      model_token = deberta_model_token

    elif (commonlit_model == "electra") :
      model_token = electra_model_token

    self.io = io
    if not self.io:
      self.read_ease = torch.tensor(data.target.values, dtype=torch.float32) 

    self.excerpt = data.excerpt.tolist()
    self.ec = model_token.batch_encode_plus(self.excerpt,padding = MODEL_PAD,max_length = MODEL_LENGTH,truncation = True,return_attention_mask=True)

    self.item_data = data        

  #loads and returns a sample from the dataset at the given index.                                   
  def __getitem__(self, index): 
    commonlit_attn = torch.tensor(self.ec['attention_mask'][index])       
    commonlit_encode = torch.tensor(self.ec['input_ids'][index])
        
    if not self.io:
      commonlit_read_ease = self.read_ease[index]
      return (commonlit_encode, commonlit_attn, commonlit_read_ease)       
    else:
      return (commonlit_encode, commonlit_attn)   

  #returns the number of samples in our dataset.  
  def __len__(self):
    return len(self.item_data)  

In [24]:
'''
Here we are getting prediction value for each transformer model and its 5 folds.Then we are taking mean of predicted values across each fold.
'''

'\nHere we are getting prediction value for each transformer model and its 5 folds.Then we are taking mean of predicted values across each fold.\n'

In [25]:
def commonlit_ensemble_predictions(X,commonlit_model) :
  test_data = X
  commonlit_result = np.zeros((5, len(test_data)))

  #creating datasets
  commonlit_item_pred = CommonLit_Item(test_data,commonlit_model,io=True)

  #creating iterable datasets
  commonlit_iterable_pred = DataLoader(commonlit_item_pred, batch_size=MODEL_BATCH,drop_last=False, shuffle=False, num_workers=MODEL_CPU_WORKERS)

  #going through each of saved models
  for arch_save_model in range(5):   
    arch_save_model_one = arch_save_model + 1          
    arch_dir = "cs_"+commonlit_model+f"_models/model_{arch_save_model_one}.pth"
    print("{0} Save Model Path {1}".format(commonlit_model,arch_dir))

    #initialize commonlit architecture                    
    commonlit_arch = CommonLit_Architecture(commonlit_model)

    #Loads an object saved with torch.save() from a file.
    commonlit_load_dir = torch.load(arch_dir)

    #Loads a model’s parameter dictionary
    commonlit_arch.load_state_dict(commonlit_load_dir) 

    #Sending to CUDA Device   
    commonlit_arch.to(CUDA)
    
    if(commonlit_model == "electra"):
      Concatenate_Last_4_Layers = False
    else:
      Concatenate_Last_4_Layers = True

    commonlit_result[arch_save_model] = commonlit_test(commonlit_arch, commonlit_iterable_pred,Concatenate_Last_4_Layers)
    
    del commonlit_arch
    gc.collect()
  
  predictions_transformers = commonlit_result.mean(axis=0)    
  return predictions_transformers

## **final_fun_1**

**The final ensemble consists of** 

1.   Roberta-Base-Squad2 (Pre-Training and Last 4 Layers)
2.   Roberta-Large-Squad2 (No Pre-Training and Last 4 Layers)
1.   Deberta (No Pre-Training and Last 4 Layers)
2.   Electra (No Pre-Training and No Last 4 Layers)


In [26]:
'''
final_fun_1 function
'''

'\nfinal_fun_1 function\n'

In [27]:
def final_fun_1(X):

  #Roberta-Base-Squad2
  ensemble_rbs_pred = commonlit_ensemble_predictions(X,"rbs")

  #Roberta-Large-Squad2
  ensemble_rls_pred = commonlit_ensemble_predictions(X,"rls")

  #Deberta
  ensemble_deberta_pred = commonlit_ensemble_predictions(X,"deberta")

  #Electra
  ensemble_electra_pred = commonlit_ensemble_predictions(X,"electra")

  ensemble_final_pred = (ensemble_rbs_pred + ensemble_rls_pred + ensemble_deberta_pred + ensemble_electra_pred)/4

  return ensemble_final_pred

In [28]:

validation_data = pd.read_csv('validation_data.csv')

In [29]:
validation_data.head()

Unnamed: 0,id,url_legal,license,excerpt,target,standard_error
0,c12129c31,,,When the young people returned to the ballroom...,-0.340259,0.464009
1,85aa80a4c,,,"All through dinner time, Mrs. Fayre was somewh...",-0.315372,0.480805
2,b69ac6792,,,"As Roger had predicted, the snow departed as q...",-0.580118,0.476676
3,dd1000b26,,,And outside before the palace a great garden w...,-1.054013,0.450007
4,37c1b32fb,,,Once upon a time there were Three Bears who li...,0.247197,0.510845


In [30]:

target = validation_data["target"]
validation_data.drop(["target","standard_error"],axis=1,inplace=True)

In [31]:
validation_data.head()

Unnamed: 0,id,url_legal,license,excerpt
0,c12129c31,,,When the young people returned to the ballroom...
1,85aa80a4c,,,"All through dinner time, Mrs. Fayre was somewh..."
2,b69ac6792,,,"As Roger had predicted, the snow departed as q..."
3,dd1000b26,,,And outside before the palace a great garden w...
4,37c1b32fb,,,Once upon a time there were Three Bears who li...


In [32]:
target

0   -0.340259
1   -0.315372
2   -0.580118
3   -1.054013
4    0.247197
5   -0.861809
6   -1.759061
Name: target, dtype: float64

In [33]:

commonlit_predictions =  final_fun_1(validation_data)

rbs Save Model Path cs_rbs_models/model_1.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_2.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_3.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_4.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_5.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rls Save Model Path cs_rls_models/model_1.pth
rls Save Model Path cs_rls_models/model_2.pth
rls Save Model Path cs_rls_models/model_3.pth
rls Save Model Path cs_rls_models/model_4.pth
rls Save Model Path cs_rls_models/model_5.pth
deberta Save Model Path cs_deberta_models/model_1.pth
deberta Save Model Path cs_deberta_models/model_2.pth
deberta Save Model Path cs_deberta_models/model_3.pth
deberta Save Model Path cs_deberta_models/model_4.pth
deberta Save Model Path cs_deberta_models/model_5.pth
electra Save Model Path cs_electra_models/model_1.pth
electra Save Model Path cs_electra_models/model_2.pth
electra Save Model Path cs_electra_models/model_3.pth
electra Save Model Path cs_electra_models/model_4.pth
electra Save Model Path cs_electra_models/model_5.pth


**predicted values**

In [34]:
commonlit_predictions

array([-0.21881673, -0.20462044, -0.59951051, -1.31998981,  0.25492687,
       -1.063782  , -1.47797014])

In [35]:
target

0   -0.340259
1   -0.315372
2   -0.580118
3   -1.054013
4    0.247197
5   -0.861809
6   -1.759061
Name: target, dtype: float64

## **final_fun_2**

In [36]:
'''
final_fun_2 function
'''

'\nfinal_fun_2 function\n'

In [37]:
def final_fun_2(X,Y):
  
  pred = final_fun_1(X)


  rmse_score = mean_squared_error(Y, pred, squared=False)

  return rmse_score

In [38]:

commonlit_rmse_score = final_fun_2(validation_data,target)

rbs Save Model Path cs_rbs_models/model_1.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_2.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_3.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_4.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rbs Save Model Path cs_rbs_models/model_5.pth


Some weights of RobertaModel were not initialized from the model checkpoint at transformers_roberta_base_squad2 and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


rls Save Model Path cs_rls_models/model_1.pth
rls Save Model Path cs_rls_models/model_2.pth
rls Save Model Path cs_rls_models/model_3.pth
rls Save Model Path cs_rls_models/model_4.pth
rls Save Model Path cs_rls_models/model_5.pth
deberta Save Model Path cs_deberta_models/model_1.pth
deberta Save Model Path cs_deberta_models/model_2.pth
deberta Save Model Path cs_deberta_models/model_3.pth
deberta Save Model Path cs_deberta_models/model_4.pth
deberta Save Model Path cs_deberta_models/model_5.pth
electra Save Model Path cs_electra_models/model_1.pth
electra Save Model Path cs_electra_models/model_2.pth
electra Save Model Path cs_electra_models/model_3.pth
electra Save Model Path cs_electra_models/model_4.pth
electra Save Model Path cs_electra_models/model_5.pth


**rmse score**

In [39]:
print(commonlit_rmse_score)

0.17647301314396438
