This file calls all the models and runs them on test data

In [1]:
import json
import torch
import torch.nn as nn
from gensim.models import KeyedVectors
import random
import numpy as np
from scipy import spatial
from sklearn.metrics import f1_score, accuracy_score
pad_len = 174
import pickle

Importing Test Data

In [3]:
# Task 1
with open('test_data_task1.json', 'r', encoding='utf-8') as test_file_task1:
    test_data_temp_task1 = json.load(test_file_task1)
    test_data_list_task1 = list(test_data_temp_task1.values())
    test_data_task1 = [test_dict for test_dict in test_data_list_task1 if len(test_dict["labels"]) <= pad_len]

with open('legaltags2labels.json', 'r', encoding='utf-8') as legaltags2labels:
    legaltag_label = json.load(legaltags2labels)

with open('labels2legaltags.json', 'r', encoding='utf-8') as labels2legaltags:
    label_legaltag = json.load(labels2legaltags)


# Task 2
with open('ATE_test.pkl', 'rb') as test_file_task2:
    test_data_dict_task2 = pickle.load(test_file_task2)
    test_data_list_task2 = list(test_data_dict_task2.values())
    test_data_task2 = [test_dict for test_dict in test_data_list_task2 if len(test_dict["labels"]) <= pad_len]

task2_dict = {"O" : 0, "B" : 1, "I":2}
task2_dict_rev = {0 : "O" , 1 : "B" , 2:"I" }

Generating Word2Vec embeddings

In [4]:
word2vec_model = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)

Generating Glove Embeddings

In [5]:
def safe_float_conversion(value):
    try:
        return float(value)
    except ValueError:
        return 0

GloVe_embeddings_dict = {}
with open("glove.6B.300d.txt", 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        # print(len(values))
        vector = np.asarray([safe_float_conversion(x) for x in values[1:] if x])
        # print(len(vector))
        if word not in GloVe_embeddings_dict:
            GloVe_embeddings_dict[word] = vector

def find_closest_embeddings(embedding):
    return sorted(GloVe_embeddings_dict.keys(), key=lambda word: spatial.distance.euclidean(GloVe_embeddings_dict[word], embedding))

def nearestwords(word,number):
    return find_closest_embeddings(GloVe_embeddings_dict[word])[:number]

Generate Fasttext Embeddings

In [None]:
# # Insert fasttext embeddings
# # !pip install gensim
# # !pip install fasttext

# import fasttext
# from huggingface_hub import hf_hub_download

# model_path = hf_hub_download(repo_id="facebook/fasttext-en-vectors", filename="model.bin")
# model = fasttext.load_model(model_path)
# model.words


# len(model.words)

# fasttext_dict = {}

# for word in model.words:
#   fasttext_dict[word] = model[word]

Preparing Sequence

In [6]:
def prepare_sequence_t1(input_dict, pretrained_model, label_dict, pad_len=174):
    input_sentence = input_dict["text"].split()
    input_tags = input_dict["labels"]
    idxs, tags = [], []

    for index in range(len(input_sentence)):
        if input_sentence[index] in pretrained_model:
            temp = pretrained_model[input_sentence[index]]
            temp = torch.tensor(temp, dtype=torch.long)
            # print(temp.size())
            idxs.append(temp)
            tags.append(label_dict[input_tags[index]])

    # Pad idxs and tags
    if len(tags) < pad_len:
        tags += [26] * (pad_len - len(tags))

    # Pad the embeddings
    if len(idxs) < pad_len:
        idxs += [torch.zeros(300)] * (pad_len - len(idxs))

    stacked_tensor = torch.stack([tensor.unsqueeze(0) for tensor in idxs], dim=0)
    return stacked_tensor, torch.tensor(tags, dtype=torch.long)

def prepare_sequence_t2(input_dict, pretrained_model, label_dict, pad_len=174):
    input_sentence = input_dict["text"].split()
    input_tags = input_dict["labels"]
    idxs, tags = [], []

    for index in range(len(input_sentence)):
        if input_sentence[index] in pretrained_model:
            temp = pretrained_model[input_sentence[index]]
            temp = torch.tensor(temp, dtype=torch.long)
            # print(temp.size())
            idxs.append(temp)
            tags.append(label_dict[input_tags[index]])

    # Pad idxs and tags
    if len(tags) < pad_len:
        tags += [0] * (pad_len - len(tags))

    # Pad the embeddings
    if len(idxs) < pad_len:
        idxs += [torch.zeros(300)] * (pad_len - len(idxs))

    stacked_tensor = torch.stack([tensor.unsqueeze(0) for tensor in idxs], dim=0)
    return stacked_tensor, torch.tensor(tags, dtype=torch.long)

Testing Loop

In [7]:
def test_model_t1(model, test_data, pretrained_embeddings, labels_dict):
    model.eval()  # Set the model to evaluation mode
    device = next(model.parameters()).device  # Get the device of the model

    test_preds, test_true = [], []

    with torch.no_grad():
        for sample in test_data:

            # Prepare input and target sequences
            input_indices, output_indices = prepare_sequence_t1(sample, pretrained_embeddings, labels_dict)
            if input_indices.size()[0] == 0:
                continue  # Skip if the input sequence is empty
            
            outputs = model(input_indices)
            _, predicted = torch.max(outputs.data, 2)  # Get the predicted classes
            
            test_preds.extend(predicted.view(-1).numpy())  # Flatten and store predictions
            test_true.extend(output_indices.view(-1).numpy()) 

    # Compute metrics
    # Calculate the F1 score for the test set
    test_f1 = f1_score(test_true, test_preds, average='macro')

    # Calculate accuracy for the test set
    test_accuracy = accuracy_score(test_true, test_preds)

    # Print the F1 score and accuracy
    print(f'Test F1 Score: {test_f1}')
    print(f'Test Accuracy: {test_accuracy}')

# test_model(model, test_data, word2vec_model, legaltag_label)
    
def test_model_t2(model, test_data, pretrained_embeddings, labels_dict):
    model.eval()  # Set the model to evaluation mode
    device = next(model.parameters()).device  # Get the device of the model

    test_preds, test_true = [], []

    with torch.no_grad():
        for sample in test_data:

            # Prepare input and target sequences
            input_indices, output_indices = prepare_sequence_t2(sample, pretrained_embeddings, labels_dict)
            if input_indices.size()[0] == 0:
                continue  # Skip if the input sequence is empty
            
            outputs = model(input_indices)
            _, predicted = torch.max(outputs.data, 2)  # Get the predicted classes
            
            test_preds.extend(predicted.view(-1).numpy())  # Flatten and store predictions
            test_true.extend(output_indices.view(-1).numpy()) 

    # Compute metrics
    # Calculate the F1 score for the test set
    test_f1 = f1_score(test_true, test_preds, average='macro')

    # Calculate accuracy for the test set
    test_accuracy = accuracy_score(test_true, test_preds)

    # Print the F1 score and accuracy
    print(f'Test F1 Score: {test_f1}')
    print(f'Test Accuracy: {test_accuracy}')

# test_model(model, test_data, word2vec_model, legaltag_label)

Instantiating Model Classes

In [8]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out)  # Take the output of the last timestep
        return out
    
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)  # Take the output of the last timestep
        return out
    
class GRUModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out)  # Take the output of the last timestep
        return out
    
# Add BiLSTM CRF

Importing Models

In [9]:
def load_pretrained_model(model_class, pretrained_model_path):
    # Load the pretrained model
    pretrained_model = torch.load(pretrained_model_path)
    
    # Infer dimensions from the loaded model tensors
    if "RNN" in pretrained_model_path:
        input_dim = pretrained_model['rnn.weight_ih_l0'].shape[1]
        hidden_dim = pretrained_model['rnn.weight_hh_l0'].shape[0]
        output_dim = pretrained_model['fc.weight'].shape[0]
        layers = 1
        model = model_class(input_dim, hidden_dim, output_dim, layers)

    elif "GRU" in pretrained_model_path:
        input_dim = pretrained_model['gru.weight_ih_l0'].shape[1]
        hidden_dim = pretrained_model['gru.weight_hh_l0'].shape[0]
        output_dim = pretrained_model['fc.weight'].shape[0]
        layers = 1
        model = model_class(input_dim, hidden_dim, output_dim, layers)

    elif "LSTM" in pretrained_model_path:
        input_dim = pretrained_model['lstm.weight_ih_l0'].shape[1]
        hidden_dim = pretrained_model['lstm.weight_hh_l0'].shape[0]
        output_dim = pretrained_model['fc.weight'].shape[0]
        layers = 1
        model = model_class(input_dim, hidden_dim, output_dim, layers)
    
    # Load the entire pretrained model (including model architecture and state_dict)
    model.load_state_dict(pretrained_model)
    
    return model

Generating Output

Task - 1

In [10]:
Task1_RNN_word2vec = RNNModel(input_dim=300, hidden_dim=64, output_dim=27, num_layers=1)
Task1_RNN_word2vec.load_state_dict(torch.load("Task1_RNN_word2vec.pth"))
Task1_RNN_GloVe = RNNModel(input_dim=300, hidden_dim=256, output_dim=27, num_layers=1)
Task1_RNN_GloVe.load_state_dict(torch.load("Task1_RNN_GloVe.pth"))
# Task1_RNN_fasttext = RNNModel(input_dim=300, hidden_dim=256, output_dim=27, num_layers=1)
# Task1_RNN_fasttext.load_state_dict(torch.load("models\Task1_RNN_fasttext.pth"))

Task1_LSTM_word2vec = LSTMModel(input_dim=300, hidden_dim=64, output_dim=27, num_layers=1,dropout=0)
Task1_LSTM_word2vec.load_state_dict(torch.load("Task1_LSTM_word2vec.pth"))
Task1_LSTM_GloVe = LSTMModel(input_dim=300, hidden_dim=256, output_dim=27, num_layers=1,dropout=0)
Task1_LSTM_GloVe.load_state_dict(torch.load("Task1_LSTM_GloVe.pth"))
# Task1_LSTM_fasttext = LSTMModel(input_dim=300, hidden_dim=5, output_dim=27, num_layers=1,dropout=0)
# Task1_LSTM_fasttext.load_state_dict(torch.load("Task1_LSTM_Fasttext.pth"))

Task1_GRU_word2vec = GRUModel(input_dim=300, hidden_dim=64, output_dim=27, num_layers=1)
Task1_GRU_word2vec.load_state_dict(torch.load("Task1_GRU_word2vec.pth"))
Task1_GRU_GloVe = GRUModel(input_dim=300, hidden_dim=64, output_dim=27, num_layers=1)  
Task1_GRU_GloVe.load_state_dict(torch.load("Task1_GRU_GloVe.pth"))
# Task1_GRU_fasttext = GRUModel(input_dim=300, hidden_dim=64, output_dim=27, num_layers=1)
# Task1_GRU_fasttext.load_state_dict(torch.load("Task1_GRU_Fasttext.pth"))

# Add BiLSTM CRF

<All keys matched successfully>

Task - 2

In [11]:
Task2_RNN_word2vec = RNNModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1)
Task2_RNN_word2vec.load_state_dict(torch.load("Task2_RNN_word2vec.pth"))
Task2_RNN_GloVe = RNNModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1)
Task2_RNN_GloVe.load_state_dict(torch.load("Task2_RNN_GloVe.pth"))
# Task2_RNN_fasttext = RNNModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1) 
# Task2_RNN_fasttext.load_state_dict(torch.load("Task2_RNN_Fasttext.pth"))


Task2_LSTM_word2vec = LSTMModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1,dropout=0)
Task2_LSTM_word2vec.load_state_dict(torch.load("Task2_LSTM_word2vec.pth"))
Task2_LSTM_GloVe = LSTMModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1,dropout=0)
Task2_LSTM_GloVe.load_state_dict(torch.load("Task2_LSTM_GloVe.pth"))
# Task2_LSTM_fasttext = LSTMModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1,dropout=0)
# Task2_LSTM_fasttext.load_state_dict(torch.load("Task2_LSTM_fasttext.pth"))


Task2_GRU_word2vec = GRUModel(input_dim=300, hidden_dim=64, output_dim=3, num_layers=1)
Task2_GRU_word2vec.load_state_dict(torch.load("Task2_GRU_word2vec.pth"))
Task2_GRU_GloVe = GRUModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1)
Task2_GRU_GloVe.load_state_dict(torch.load("Task2_GRU_GloVe.pth"))
# Task2_GRU_fasttext = GRUModel(input_dim=300, hidden_dim=256, output_dim=3, num_layers=1) # Giving Error
# Task2_GRU_fasttext.load_state_dict(torch.load("Task2_GRU_fasttext.pth"))

# Add BiLSTM CRF

<All keys matched successfully>

Testing Models

In [149]:
print("Task-1 RNN Word2Vec")
test_model_t1(Task1_RNN_word2vec, test_data_list_task1, word2vec_model, legaltag_label)
print()

print("Task-1 RNN GloVe")
test_model_t1(Task1_RNN_GloVe, test_data_list_task1, GloVe_embeddings_dict, legaltag_label)
print()

# print("Task-1 RNN fasttext")
# test_model_t1(Task1_RNN_fasttext, test_data_list_task1, fasttext_dict, legaltag_label) # rename fasttext dict
# print()


print("Task-1 LSTM Word2Vec")
test_model_t1(Task1_LSTM_word2vec, test_data_list_task1, word2vec_model, legaltag_label)
print()

print("Task-1 LSTM GloVe")
test_model_t1(Task1_LSTM_GloVe, test_data_list_task1, GloVe_embeddings_dict, legaltag_label)
print()

# print("Task-1 LSTM fasttext")
# test_model_t1(Task1_LSTM_fasttext, test_data_list_task1, fasttext_dict, legaltag_label) # rename fasttext dict
# print()


print("Task-1 GRU Word2Vec")
test_model_t1(Task1_GRU_word2vec, test_data_list_task1, word2vec_model, legaltag_label)
print()

print("Task-1 GRU GloVe")
test_model_t1(Task1_GRU_GloVe, test_data_list_task1, GloVe_embeddings_dict, legaltag_label)
print()

# print("Task-1 GRU fasttext")
# test_model_t1(Task1_GRU_fasttext, test_data_list_task1, fasttext_dict, legaltag_label) # rename fasttext dict
# print()


# # Add BiLSTM CRF


Task-1 RNN Word2Vec
Test F1 Score: 0.03665188258262209
Test Accuracy: 0.979415718905563

Task-1 RNN GloVe
Test F1 Score: 0.13225549419595778
Test Accuracy: 0.9903528214817775

Task-1 LSTM Word2Vec
Test F1 Score: 0.03665188258262209
Test Accuracy: 0.979415718905563

Task-1 LSTM GloVe
Test F1 Score: 0.15805884841311724
Test Accuracy: 0.9906556205564235

Task-1 GRU Word2Vec
Test F1 Score: 0.03665188258262209
Test Accuracy: 0.979415718905563



In [159]:
print("Task-2 RNN Word2Vec")
test_model_t2(Task2_RNN_word2vec, test_data_list_task2, word2vec_model, task2_dict)
print()

print("Task-2 RNN GloVe")
test_model_t2(Task2_RNN_GloVe, test_data_list_task2, GloVe_embeddings_dict, task2_dict)
print()

# print("Task-2 RNN fasttext")
# # test_model_t2(Task2_RNN_fasttext, test_data_list_task2, word2vec_model, task2_dict) # rename fasttext dict
# print()


print("Task-2 LSTM Word2Vec")
test_model_t2(Task2_LSTM_word2vec, test_data_list_task2, word2vec_model, task2_dict)
print()

print("Task-2 LSTM GloVe")
test_model_t2(Task2_LSTM_GloVe, test_data_list_task2, GloVe_embeddings_dict, task2_dict)
print()

# print("Task-2 LSTM fasttext")
# # test_model_t2(Task2_LSTM_fasttext, test_data_list_task2, word2vec_model, task2_dict) # rename fasttext dict
# print()


print("Task-2 GRU Word2Vec")
test_model_t2(Task2_GRU_word2vec, test_data_list_task2, word2vec_model, task2_dict)
print()

print("Task-2 GRU GloVe")
test_model_t2(Task2_GRU_GloVe, test_data_list_task2, GloVe_embeddings_dict, task2_dict)
print()

# print("Task-2 GRU fasttext")
# # test_model_t2(Task2_GRU_fasttext, test_data_list_task2, word2vec_model, task2_dict) # rename fasttext dict
# print()


# Add BiLSTM CRF


Task-2 RNN Word2Vec
Test F1 Score: 0.3313179032476041
Test Accuracy: 0.9879800953181945

Task-2 RNN GloVe
Test F1 Score: 0.5387312564922245
Test Accuracy: 0.9916771797028315

Task-2 LSTM Word2Vec
Test F1 Score: 0.3313179032476041
Test Accuracy: 0.9879800953181945

Task-2 LSTM GloVe
Test F1 Score: 0.5916985066094422
Test Accuracy: 0.9921327446033081

Task-2 GRU Word2Vec
Test F1 Score: 0.3313179032476041
Test Accuracy: 0.9879800953181945

Task-2 GRU GloVe
Test F1 Score: 0.5873960406533144
Test Accuracy: 0.9920626576955425

