# Load Necessary Libraries and data

In [1]:
!pip install -U sentence-transformers
!pip install joblib



In [2]:
from sentence_transformers import SentenceTransformer
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from joblib import load
import pandas as pd
import numpy as np

In [3]:
# Load the data for prediction
data = pd.read_csv('dev.csv')

# Load the SBERT model
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

In [4]:
def generate_embeddings(dataframe, model):
    # Generate embeddings for premises and hypotheses separately
    premise_embeddings = model.encode(dataframe['premise'].tolist(), show_progress_bar=True)
    hypothesis_embeddings = model.encode(dataframe['hypothesis'].tolist(), show_progress_bar=True)
    
    # Combine the two sets of embeddings into one feature set
    embeddings = np.concatenate((premise_embeddings, hypothesis_embeddings), axis=1)
    
    return embeddings

In [5]:
embeddings = generate_embeddings(data, sentence_model)

Batches:   0%|          | 0/211 [00:00<?, ?it/s]

Batches:   0%|          | 0/211 [00:00<?, ?it/s]

# Predict using Model A (Traditional ML)

In [6]:
modelA = load('ensembleNLI10.joblib')
predictA = modelA.predict(embeddings)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished


In [7]:
predictionsA_df = pd.DataFrame(predictA, columns=['prediction'])
predictionsA_df

Unnamed: 0,prediction
0,1
1,0
2,1
3,1
4,1
...,...
6732,0
6733,1
6734,1
6735,0


In [8]:
predictionsA_df.to_csv('predictionsA.csv', index=False)

# Predict using Model B (Deep Learning)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [10]:
class AttentionLayer(nn.Module):
    def __init__(self, input_dim):
        super(AttentionLayer, self).__init__()
        self.weight = nn.Parameter(torch.randn(input_dim, 1))
        self.bias = nn.Parameter(torch.randn(1))

    def forward(self, x):
        e = torch.tanh(torch.matmul(x, self.weight) + self.bias)
        a = torch.softmax(e, dim=1)
        output = torch.sum(x * a, dim=1)
        return output, a

In [11]:
class BiGRUAttentionModel(nn.Module):
    def __init__(self, embedding_dim):
        super(BiGRUAttentionModel, self).__init__()
        self.bi_gru = nn.GRU(embedding_dim, 64, bidirectional=True, batch_first=True)
        self.attention = AttentionLayer(128)
        self.norm1 = nn.LayerNorm(128)
        self.fc1 = nn.Linear(128, 64)
        self.norm2 = nn.LayerNorm(64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.4)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x, _ = self.bi_gru(x)
        x, attn_weights = self.attention(x)
        x = self.norm1(x)
        x = self.fc1(x)
        x = self.norm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [12]:
embedding_dim = sentence_model.get_sentence_embedding_dimension() * 2
modelB = BiGRUAttentionModel(embedding_dim)
modelB.to(device)

BiGRUAttentionModel(
  (bi_gru): GRU(768, 64, batch_first=True, bidirectional=True)
  (attention): AttentionLayer()
  (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.4, inplace=False)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

In [13]:
modelB.load_state_dict(torch.load('AttentionGRU.pth', map_location=torch.device('cpu')))

<All keys matched successfully>

In [14]:
modelB.eval()

BiGRUAttentionModel(
  (bi_gru): GRU(768, 64, batch_first=True, bidirectional=True)
  (attention): AttentionLayer()
  (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.4, inplace=False)
  (fc2): Linear(in_features=64, out_features=1, bias=True)
)

In [15]:
with torch.no_grad():
    embeddings_tensor = torch.from_numpy(embeddings).float()
    embeddings_tensor = embeddings_tensor.to(device)
    outputs = modelB(embeddings_tensor.unsqueeze(1))
    predictB = torch.sigmoid(outputs).round().cpu().numpy().astype(int)

In [16]:
predictionB_df = pd.DataFrame(predictB, columns=['prediction'])
predictionB_df

Unnamed: 0,prediction
0,1
1,0
2,1
3,0
4,1
...,...
6732,0
6733,1
6734,1
6735,0


In [17]:
predictionB_df.to_csv('predictionsB.csv', index=False)