# Import Libraries, Load RoBERTa model and tokenizer 

In [16]:
import pandas as pd 

# Load model directly
from transformers import AutoTokenizer, XLMRobertaModel

from torch.utils.data import Dataset, DataLoader
from torch.nn import Module
from torch.nn import CrossEntropyLoss as LossFn
from torch.optim import SGD as Optimizer
import torch

# show the training progress
from tqdm import tqdm

# split dataset to train and validation 
from sklearn.model_selection import train_test_split

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")    

tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
tokenize = lambda text: tokenizer(text, 
                                  return_tensors = 'pt', 
                                  max_length = 512, 
                                  padding = 'max_length', 
                                  truncation = True)

roberta = XLMRobertaModel.from_pretrained("xlm-roberta-base")

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB


# Deffine Model Class

In [17]:
# Build a model for text classification from 2 text inputs to 3 output labels
class CompetitionModel(Module):
    """
    CompetitionModel class represents a neural network model for text classification using two input texts 
    and producing three output labels.
    """
    def __init__(self, output_shape, model=None, hidden_size=64):
        """
        Initializes the CompetitionModel.

        Parameters:
        - output_shape (int): Number of output classes for classification.
        - model (optional): Pre-trained transformer model for feature extraction.
        - hidden_size (int): Size of the hidden layers in the model.
        """
        # Initialize the torch.nn.Module
        super().__init__()
        
        # Set the default model to 'roberta' if not provided
        if model is None: 
            model = roberta
            
        # Move the model to GPU if available
        if torch.cuda.is_available(): 
            model.cuda()
            
        self.model = model
        
        # Linear layer to transform the hidden state of the 'Premise' input
        self.hidden_premise = torch.nn.Linear(model.config.hidden_size, hidden_size)
        
        # Linear layer to transform the hidden state of the 'Hipotesis' input
        self.hidden_hipotesis = torch.nn.Linear(model.config.hidden_size, hidden_size)
        
        # Linear layer for the final classification combining transformed outputs of 'Premise' and 'Hipotesis'
        self.output_linear = torch.nn.Linear(hidden_size * 2, output_shape)
        
        # Softmax activation for obtaining output probabilities
        self.output_softmax = torch.nn.Softmax(dim=-1)
    
    def forward(self,
                
                premise_input_ids, 
                premise_attention_mask, 
                
                hipotesis_input_ids, 
                hipotesis_attention_mask): 
        """
        Forward pass of the model.

        Parameters:
        - premise_input_ids: Encoded data of 'Premise' column.
        - premise_attention_mask: Attention mask for 'Premise' input.
        - hipotesis_input_ids: Encoded data of 'Hipotesis' column.
        - hipotesis_attention_mask: Attention mask for 'Hipotesis' input.

        Returns:
        - output: Model predictions after the forward pass.
        """
        # Get the output of the 'roberta' model for 'Premise' with shape 768
        premise_roberta_outputs = self.model(premise_input_ids, attention_mask=premise_attention_mask).pooler_output
        
        # Get the output of the 'roberta' model for 'Hipotesis' with shape 768
        hipotesis_roberta_outputs = self.model(hipotesis_input_ids, attention_mask=hipotesis_attention_mask).pooler_output
        
        # Transform the hidden state of 'Premise' using a linear layer
        transformed_premise = self.hidden_premise(premise_roberta_outputs)
        
        # Transform the hidden state of 'Hipotesis' using a linear layer
        transformed_hipotesis = self.hidden_hipotesis(hipotesis_roberta_outputs)
        
        # Concatenate the transformed outputs of 'Premise' and 'Hipotesis'
        concatenated_output = torch.cat([transformed_premise, transformed_hipotesis], axis=1)
        
        # Apply a linear layer for the final classification
        linear_output = self.output_linear(concatenated_output)
        
        # Apply softmax activation for obtaining output probabilities
        output = self.output_softmax(linear_output)
        
        return output

# Number of output classes for classification
num_classes = 3

# Instantiate the CompetitionModel
model = CompetitionModel(
    output_shape=num_classes, 
    model=roberta
)

if torch.cuda.is_available(): 
    model.cuda()

# Load Weights to Model

In [18]:
state_dict = torch.load('/kaggle/input/roberta-finetuned-model/model')()
model.load_state_dict(state_dict, strict=False)

_IncompatibleKeys(missing_keys=[], unexpected_keys=['model.embeddings.position_ids'])

# Make Predictions

In [19]:
# make some visualizations 
from tqdm import tqdm 

In [20]:
test_path = '/kaggle/input/contradictory-my-dear-watson/test.csv'

test_df = pd.read_csv(test_path)

In [21]:
pred_labels:list = []

for premise, hipothesis in tqdm(zip(test_df.premise.to_list(), test_df.hypothesis.to_list())):
    hip_enc = tokenize(hipothesis).to(device)
    premise_enc = tokenize(premise).to(device)

    prediction = model(premise_enc['input_ids'], premise_enc['attention_mask'], hip_enc['input_ids'], hip_enc['attention_mask'])
    prediction = int(torch.argmax(prediction[0]))
    pred_labels.append(prediction)

5195it [03:44, 23.18it/s]


In [24]:
submission = pd.DataFrame({'id': test_df.id, 'prediction': pred_labels})
submission.to_csv('submission.csv', index = False)

In [25]:
submission.prediction.value_counts()

prediction
0    5195
Name: count, dtype: int64