In [1]:
import pandas as pd
import numpy as np
import re
from nltk.tokenize import word_tokenize

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from tqdm import tqdm

UNKNOWN_TOKEN='UNK'
PAD_TOKEN='PAD'
START_TOKEN = '<START>'
END_TOKEN = '<END>'


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## Data

In [3]:
train_data=pd.read_csv('/kaggle/input/assignment-3/train.csv')
test_data=pd.read_csv('/kaggle/input/assignment-3/test.csv')

train_data

Unnamed: 0,Class Index,Description
0,3,"Reuters - Short-sellers, Wall Street's dwindli..."
1,3,Reuters - Private investment firm Carlyle Grou...
2,3,Reuters - Soaring crude prices plus worries\ab...
3,3,Reuters - Authorities have halted oil export\f...
4,3,"AFP - Tearaway world oil prices, toppling reco..."
...,...,...
119995,1,KARACHI (Reuters) - Pakistani President Perve...
119996,2,Red Sox general manager Theo Epstein acknowled...
119997,2,The Miami Dolphins will put their courtship of...
119998,2,PITTSBURGH at NY GIANTS Time: 1:30 p.m. Line: ...


## Data Preprocessing

In [4]:
def preprocess_text(data,type='train'):
    sentences=[]
    vocab=set()
    vocab.add(PAD_TOKEN)
    vocab.add(UNKNOWN_TOKEN)
    total=0

    frequency=dict()
    for text in data:
        text = re.sub(r'[^\w\s\n]', ' ', str(text).lower())
        words = word_tokenize(text)
        words = [START_TOKEN] + words + [END_TOKEN]
        sentences.append(words)
        for word in words:
            frequency[word]=frequency.get(word,0)+1
            total+=1
    
    if type=='train':
        frequency_threshold=3
        for i in range(len(sentences)):
            for j in range(len(sentences[i])):
                if frequency[sentences[i][j]]<frequency_threshold:
                    sentences[i][j]=UNKNOWN_TOKEN

    for sentence in sentences:
        for word in sentence:
            vocab.add(word)
    vocab=list(vocab)
    vocab = sorted(vocab)
    return sentences,vocab

In [5]:
sentences_train,vocab = preprocess_text(train_data['Description'])
sentences_test,_ = preprocess_text(test_data['Description'],'test')

In [6]:
print(len(vocab))

32009


In [7]:
word2id = {}
id2word = {}
sorted_vocab = sorted(vocab)
for i, word in enumerate(sorted_vocab):
    word2id[word] = i
    id2word[i] = word

In [9]:
sentence_lengths = [len(sentence) for sentence in sentences_train]
sorted_lengths = sorted(sentence_lengths)
index_95th_percentile = int(np.percentile(range(len(sorted_lengths)), 95))
length_95th_percentile = sorted_lengths[index_95th_percentile]
length_sentence=length_95th_percentile

for i in range(len(sentences_train)):
    sentence=sentences_train[i]
    sentences_train[i]=[word2id.get(word,word2id[UNKNOWN_TOKEN]) for word in sentence]
for i in range(len(sentences_test)):
    sentence=sentences_test[i]
    sentences_test[i]=[word2id.get(word,word2id[UNKNOWN_TOKEN]) for word in sentence]

In [8]:
def load_glove_model(file_path):
    word_vectors = {}

    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = [float(val) for val in values[1:]]
            word_vectors[word] = vector

    return word_vectors

glove_file_path = '/kaggle/input/gloveembeddings/glove.6B.100d.txt'
glove_dict = load_glove_model(glove_file_path)

In [10]:
def create_embedding_matrix(glove_dict):
    weights_matrix = torch.zeros((len(vocab), 100))
    for i, word in enumerate(vocab):
        weights_matrix[i] = torch.tensor(glove_dict.get(word,np.random.uniform(-1, 1, size=100)))
    return weights_matrix


In [11]:
embedding_matrix = create_embedding_matrix(glove_dict)
print(embedding_matrix.size())


torch.Size([32009, 100])


In [12]:
class ELMo(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, embedding_matrix):
        super(ELMo, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.embedding1 = nn.Embedding.from_pretrained(embedding_matrix)
        self.embedding2 = nn.Embedding.from_pretrained(embedding_matrix)
        self.lstm_forward1 = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.lstm_forward2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.lstm_backward1 = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.lstm_backward2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.linear_mode1 = nn.Linear(200, vocab_size)
        self.linear_mode2 = nn.Linear(200, vocab_size)

    def forward(self, input_data, mode):
        if mode == 1:
            forward_embed = self.embedding1(input_data)
            forward_lstm1, _ = self.lstm_forward1(forward_embed) 
            forward_lstm2, _ = self.lstm_forward2(forward_lstm1) 
            lstm_concat = torch.cat((forward_lstm1, forward_lstm2), dim=-1)
            output = self.linear_mode1(lstm_concat)
            return output
        
        elif mode == 2:
            backward_embed = self.embedding2(input_data)
            backward_lstm1, _ = self.lstm_backward1(backward_embed) 
            backward_lstm2, _ = self.lstm_backward2(backward_lstm1) 
            lstm_concat = torch.cat((backward_lstm1, backward_lstm2), dim=-1)
            output = self.linear_mode2(lstm_concat)
            return output


In [17]:
vocab_size = len(vocab)
embedding_dim = 100
hidden_dim = 100
batch_size=32

elmo = ELMo(vocab_size, embedding_dim, hidden_dim, embedding_matrix)
elmo.to(device)

ELMo(
  (embedding1): Embedding(32009, 100)
  (embedding2): Embedding(32009, 100)
  (lstm_forward1): LSTM(100, 100, batch_first=True)
  (lstm_forward2): LSTM(100, 100, batch_first=True)
  (lstm_backward1): LSTM(100, 100, batch_first=True)
  (lstm_backward2): LSTM(100, 100, batch_first=True)
  (linear_mode1): Linear(in_features=200, out_features=32009, bias=True)
  (linear_mode2): Linear(in_features=200, out_features=32009, bias=True)
)

In [18]:
X_train = []
for sentence in sentences_train:
    if len(sentence) < length_sentence:
        padding_needed = length_sentence - len(sentence)
        sentence.extend(padding_needed*[word2id[PAD_TOKEN]])
    X_train.append(torch.tensor(sentence[:length_sentence]))
y_train = pd.get_dummies(train_data['Class Index'], prefix='value', dtype=int).values

X_test = []
for sentence in sentences_test:
    if len(sentence) < length_sentence:
        padding_needed = length_sentence - len(sentence)
        sentence.extend(padding_needed*[word2id[PAD_TOKEN]])
    X_test.append(torch.tensor(sentence[:length_sentence]))
y_test = pd.get_dummies(test_data['Class Index'], prefix='value', dtype=int).values


X_train_tensor = torch.stack(X_train)
X_test_tensor = torch.stack(X_test)
y_train_tensor = torch.tensor(y_train,dtype=torch.float32)
y_test_tensor = torch.tensor(y_test,dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)


In [19]:
from tqdm import tqdm

num_epochs = 10
criterion = nn.CrossEntropyLoss(ignore_index=word2id[PAD_TOKEN])
optimizer = optim.Adam(elmo.parameters(), lr=0.001)  

for epoch in range(num_epochs):
    elmo.train()
    total_loss = 0.0
    total_tokens = 0
    for inputs in tqdm(train_loader):
        inputs = inputs[0]
        inputs = inputs.to(device)
        optimizer.zero_grad()
        input_seq = inputs[:, :length_sentence-1]
        target_seq = inputs[:, 1:]
        outputs = elmo(input_seq, mode=1)
        loss = criterion(outputs.permute(0, 2, 1), target_seq)  
        total_loss += loss.item()
        total_tokens += target_seq.numel()
        loss.backward()
        optimizer.step()
    avg_loss = total_loss / total_tokens
    print(f"Epoch {epoch+1}/{num_epochs} (Mode 1) - Train Loss: {avg_loss:.4f}")

for epoch in range(num_epochs):
    elmo.train()
    total_loss = 0.0
    total_tokens = 0
    for inputs in tqdm(train_loader):
        inputs = inputs[0]
        inputs = inputs.to(device)
        optimizer.zero_grad()
        inputs = torch.flip(inputs, dims=[1])
        input_seq = inputs[:, :length_sentence-1]
        target_seq = inputs[:, 1:]
        outputs = elmo(input_seq, mode=1)
        loss = criterion(outputs.permute(0, 2, 1), target_seq) 
        total_loss += loss.item()
        total_tokens += target_seq.numel()
        loss.backward()
        optimizer.step()
    avg_loss = total_loss / total_tokens
    print(f"Epoch {epoch+1}/{num_epochs} (Mode 2) - Train Loss: {avg_loss:.4f}")

100%|██████████| 3750/3750 [02:22<00:00, 26.26it/s]


Epoch 1/10 (Mode 1) - Train Loss: 0.0036


100%|██████████| 3750/3750 [02:22<00:00, 26.37it/s]


Epoch 2/10 (Mode 1) - Train Loss: 0.0031


100%|██████████| 3750/3750 [02:22<00:00, 26.40it/s]


Epoch 3/10 (Mode 1) - Train Loss: 0.0029


100%|██████████| 3750/3750 [02:22<00:00, 26.39it/s]


Epoch 4/10 (Mode 1) - Train Loss: 0.0028


100%|██████████| 3750/3750 [02:22<00:00, 26.40it/s]


Epoch 5/10 (Mode 1) - Train Loss: 0.0027


100%|██████████| 3750/3750 [02:21<00:00, 26.42it/s]


Epoch 6/10 (Mode 1) - Train Loss: 0.0027


100%|██████████| 3750/3750 [02:22<00:00, 26.39it/s]


Epoch 7/10 (Mode 1) - Train Loss: 0.0026


100%|██████████| 3750/3750 [02:21<00:00, 26.42it/s]


Epoch 8/10 (Mode 1) - Train Loss: 0.0026


100%|██████████| 3750/3750 [02:21<00:00, 26.43it/s]


Epoch 9/10 (Mode 1) - Train Loss: 0.0026


100%|██████████| 3750/3750 [02:21<00:00, 26.42it/s]


Epoch 10/10 (Mode 1) - Train Loss: 0.0025


100%|██████████| 3750/3750 [02:22<00:00, 26.40it/s]


Epoch 1/10 (Mode 2) - Train Loss: 0.0033


100%|██████████| 3750/3750 [02:22<00:00, 26.33it/s]


Epoch 2/10 (Mode 2) - Train Loss: 0.0029


100%|██████████| 3750/3750 [02:22<00:00, 26.31it/s]


Epoch 3/10 (Mode 2) - Train Loss: 0.0028


100%|██████████| 3750/3750 [02:22<00:00, 26.32it/s]


Epoch 4/10 (Mode 2) - Train Loss: 0.0027


100%|██████████| 3750/3750 [02:22<00:00, 26.31it/s]


Epoch 5/10 (Mode 2) - Train Loss: 0.0026


100%|██████████| 3750/3750 [02:22<00:00, 26.32it/s]


Epoch 6/10 (Mode 2) - Train Loss: 0.0026


100%|██████████| 3750/3750 [02:22<00:00, 26.39it/s]


Epoch 7/10 (Mode 2) - Train Loss: 0.0025


100%|██████████| 3750/3750 [02:22<00:00, 26.39it/s]


Epoch 8/10 (Mode 2) - Train Loss: 0.0025


100%|██████████| 3750/3750 [02:22<00:00, 26.40it/s]


Epoch 9/10 (Mode 2) - Train Loss: 0.0025


100%|██████████| 3750/3750 [02:22<00:00, 26.39it/s]

Epoch 10/10 (Mode 2) - Train Loss: 0.0024





In [20]:
elmo.eval()
total_loss = 0.0
total_tokens = 0 
with torch.no_grad():
    for inputs in tqdm(test_loader):
        inputs = inputs[0]
        inputs = inputs.to(device)
        input_seq = inputs[:, :length_sentence-1]
        target_seq = inputs[:, 1:]
        outputs = elmo(input_seq, mode=1)
        loss = criterion(outputs.permute(0, 2, 1), target_seq)  
        total_loss += loss.item()
        total_tokens += target_seq.numel()
    avg_loss = total_loss / total_tokens
print(f"Test Loss (Mode 1): {avg_loss:.4f}")

elmo.eval()
total_loss = 0.0
total_tokens = 0  
with torch.no_grad():
    for inputs in tqdm(test_loader):
        inputs = inputs[0]
        inputs = inputs.to(device)
        inputs = torch.flip(inputs, dims=[1])
        input_seq = inputs[:, :length_sentence-1]
        target_seq = inputs[:, 1:]
        outputs = elmo(input_seq, mode=2)
        loss = criterion(outputs.permute(0, 2, 1), target_seq) 
        total_loss += loss.item()
        total_tokens += target_seq.numel()
    avg_loss = total_loss / total_tokens
print(f"Test Loss (Mode 2): {avg_loss:.4f}")

100%|██████████| 238/238 [00:03<00:00, 65.07it/s]


Test Loss (Mode 1): 0.0057


100%|██████████| 238/238 [00:03<00:00, 65.03it/s]

Test Loss (Mode 2): 0.0062





In [21]:
model_path='/kaggle/working/bilstm.pth'
torch.save(elmo.state_dict(), model_path)

In [13]:
vocab_size = len(vocab)
embedding_dim = 100
hidden_dim = 100
batch_size=32

model_path = "/kaggle/input/bilstm2/bilstm (1).pth"
elmo = ELMo(vocab_size, embedding_dim, hidden_dim, embedding_matrix)
state_dict = torch.load(model_path)

elmo.load_state_dict(state_dict)


<All keys matched successfully>

In [25]:
X_train = []
for sentence in sentences_train:
    sentence_embedding = [embedding_matrix[word] for word in sentence]
    if len(sentence) < length_sentence:
        padding_needed = length_sentence - len(sentence)
        sentence_embedding.extend(padding_needed*[embedding_matrix[word2id[PAD_TOKEN]]])
    if sentence_embedding:
        X_train.append(torch.stack(sentence_embedding[:length_sentence]))
y_train = pd.get_dummies(train_data['Class Index'], prefix='value', dtype=int).values

X_test = []
for sentence in sentences_test:
    sentence_embedding = [embedding_matrix[word] for word in sentence]
    if len(sentence) < length_sentence:
        padding_needed = length_sentence - len(sentence)
        sentence_embedding.extend(padding_needed*[embedding_matrix[word2id[PAD_TOKEN]]])
    if sentence_embedding:
        X_test.append(torch.stack(sentence_embedding[:length_sentence]))
y_test = pd.get_dummies(test_data['Class Index'], prefix='value', dtype=int).values


X_train_tensor = torch.stack(X_train)
X_test_tensor = torch.stack(X_test)
y_train_tensor = torch.tensor(y_train,dtype=torch.float32)
y_test_tensor = torch.tensor(y_test,dtype=torch.float32)

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        output, _ = self.lstm(x)
        output = self.fc(output[:, -1, :]) 
        return output

input_size = 100  
hidden_size = 128
output_size = 4
model = LSTMModel(input_size, hidden_size, output_size).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=word2id[PAD_TOKEN])
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    total_samples = 0
    
    for batch_X, batch_y in tqdm(train_loader):
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        optimizer.zero_grad()
        outputs = model(batch_X)
        _, target_indices = batch_y.max(dim=1)
        loss = criterion(outputs, target_indices)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch_X.size(0)
        total_samples += batch_X.size(0)
    epoch_loss = total_loss / total_samples
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss}")

model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(torch.argmax(batch_y, dim=1).cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Train Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)
    print()

with torch.no_grad():
    X_test_tensor, y_test_tensor = X_test_tensor.to(device), y_test_tensor.to(device)
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs, 1)
    y_true = torch.argmax(y_test_tensor, dim=1).cpu().numpy()
    y_pred = predicted.cpu().numpy()
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Test Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)


Epoch 1/10, Loss: 0.4964531057993571
Epoch 2/10, Loss: 0.26761747864931823
Epoch 3/10, Loss: 0.23608226016064485
Epoch 4/10, Loss: 0.210938997592777
Epoch 5/10, Loss: 0.18908926866576076
Epoch 6/10, Loss: 0.1695423967368901
Epoch 7/10, Loss: 0.1516266350803276
Epoch 8/10, Loss: 0.1350703932646662
Epoch 9/10, Loss: 0.11938067760529618
Epoch 10/10, Loss: 0.10560751794824998
Train Set:
Accuracy: 0.9702833333333334
Precision: 0.9702992419278945
Recall: 0.9702833333333334
F1 Score: 0.9702506269056209
Confusion Matrix: [[29281   157   358   204]
 [   50 29889    18    43]
 [  320    57 28892   731]
 [  483    32  1113 28372]]

Test Set:
Accuracy: 0.9092105263157895
Precision: 0.9092638823760439
Recall: 0.9092105263157895
F1 Score: 0.9092170828865068
Confusion Matrix: [[1737   35   73   55]
 [  24 1849   13   14]
 [  63   11 1669  157]
 [  62   11  172 1655]]


In [14]:
class ELMo_Embeddings(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, embedding_matrix):
        super(ELMo_Embeddings, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.embedding1 = nn.Embedding.from_pretrained(embedding_matrix)
        self.embedding2 = nn.Embedding.from_pretrained(embedding_matrix)
        self.lstm_forward1 = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.lstm_forward2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.lstm_backward1 = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.lstm_backward2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.linear_mode1 = nn.Linear(200, vocab_size)
        self.linear_mode2 = nn.Linear(200, vocab_size)

    def forward(self, input_data):
        forward_embed = self.embedding1(input_data)
        forward_lstm1, _ = self.lstm_forward1(forward_embed) 
        forward_lstm2, _ = self.lstm_forward2(forward_lstm1)

        input_data = torch.flip(input_data, dims=[1])
        backward_embed = self.embedding2(input_data)
        backward_lstm1, _ = self.lstm_backward1(backward_embed)
        backward_lstm2, _ = self.lstm_backward2(backward_lstm1)
        backward_lstm1 = torch.flip(backward_lstm1, dims=[1])
        backward_lstm2 = torch.flip(backward_lstm2, dims=[1])

        e1 = torch.cat((forward_embed, forward_embed), dim=-1)
        e2 = torch.cat((forward_lstm1, backward_lstm1), dim=-1)
        e3 = torch.cat((forward_lstm2, backward_lstm2), dim=-1)

        return e1,e2,e3

In [15]:
elmo

ELMo(
  (embedding1): Embedding(32009, 100)
  (embedding2): Embedding(32009, 100)
  (lstm_forward1): LSTM(100, 100, batch_first=True)
  (lstm_forward2): LSTM(100, 100, batch_first=True)
  (lstm_backward1): LSTM(100, 100, batch_first=True)
  (lstm_backward2): LSTM(100, 100, batch_first=True)
  (linear_mode1): Linear(in_features=200, out_features=32009, bias=True)
  (linear_mode2): Linear(in_features=200, out_features=32009, bias=True)
)

In [16]:
elmo_embed=ELMo_Embeddings(vocab_size, embedding_dim, hidden_dim, embedding_matrix)
elmo_embed.load_state_dict(elmo.state_dict())
elmo_embed.to(device)

ELMo_Embeddings(
  (embedding1): Embedding(32009, 100)
  (embedding2): Embedding(32009, 100)
  (lstm_forward1): LSTM(100, 100, batch_first=True)
  (lstm_forward2): LSTM(100, 100, batch_first=True)
  (lstm_backward1): LSTM(100, 100, batch_first=True)
  (lstm_backward2): LSTM(100, 100, batch_first=True)
  (linear_mode1): Linear(in_features=200, out_features=32009, bias=True)
  (linear_mode2): Linear(in_features=200, out_features=32009, bias=True)
)

In [17]:
X_train = []
for sentence in sentences_train:
    if len(sentence) < length_sentence:
        padding_needed = length_sentence - len(sentence)
        sentence.extend(padding_needed*[word2id[PAD_TOKEN]])
    X_train.append(torch.tensor(sentence[:length_sentence]))
y_train = pd.get_dummies(train_data['Class Index'], prefix='value', dtype=int).values

X_test = []
for sentence in sentences_test:
    if len(sentence) < length_sentence:
        padding_needed = length_sentence - len(sentence)
        sentence.extend(padding_needed*[word2id[PAD_TOKEN]])
    X_test.append(torch.tensor(sentence[:length_sentence]))
y_test = pd.get_dummies(test_data['Class Index'], prefix='value', dtype=int).values


X_train_tensor = torch.stack(X_train)
X_test_tensor = torch.stack(X_test)
y_train_tensor = torch.tensor(y_train,dtype=torch.float32)
y_test_tensor = torch.tensor(y_test,dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [18]:
class LSTMModel_Trainable(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel_Trainable, self).__init__()
        self.weights=nn.Parameter(torch.tensor([0.33,0.33,0.33]))
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, e1, e2, e3):
        weights_softmax = torch.nn.functional.softmax(self.weights, dim=0)
        x = e1 * weights_softmax[0] + e2 * weights_softmax[1] + e3 * weights_softmax[2]
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

In [19]:
input_size = 200  
hidden_size = 128
output_size = 4
model = LSTMModel_Trainable(input_size, hidden_size, output_size).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=word2id[PAD_TOKEN])
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    total_samples = 0
    
    for batch_X, batch_y in tqdm(train_loader):
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        optimizer.zero_grad()
        outputs = model(e1, e2, e3)
        _, target_indices = batch_y.max(dim=1)
        loss = criterion(outputs, target_indices)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch_X.size(0)
        total_samples += batch_X.size(0)
    epoch_loss = total_loss / total_samples
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss}")

model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for batch_X, batch_y in tqdm(train_loader):
        batch_X = batch_X.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        outputs = model(e1, e2, e3)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(torch.argmax(batch_y, dim=1).cpu().numpy())  
        y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Train Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)
    print()
    
model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for batch_X, batch_y in tqdm(test_loader):
        batch_X = batch_X.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        outputs = model(e1, e2, e3)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(torch.argmax(batch_y, dim=1).cpu().numpy())  
        y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Test Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)
    print()

100%|██████████| 3750/3750 [00:26<00:00, 142.77it/s]


Epoch 1/10, Loss: 0.4739084247479836


100%|██████████| 3750/3750 [00:25<00:00, 149.79it/s]


Epoch 2/10, Loss: 0.29628586613237856


100%|██████████| 3750/3750 [00:25<00:00, 149.62it/s]


Epoch 3/10, Loss: 0.257423589746654


100%|██████████| 3750/3750 [00:24<00:00, 150.09it/s]


Epoch 4/10, Loss: 0.23004856839577356


100%|██████████| 3750/3750 [00:24<00:00, 150.28it/s]


Epoch 5/10, Loss: 0.2076663779253761


100%|██████████| 3750/3750 [00:25<00:00, 149.44it/s]


Epoch 6/10, Loss: 0.18900374225589137


100%|██████████| 3750/3750 [00:25<00:00, 149.85it/s]


Epoch 7/10, Loss: 0.16953803968106707


100%|██████████| 3750/3750 [00:25<00:00, 149.41it/s]


Epoch 8/10, Loss: 0.1510261582493782


100%|██████████| 3750/3750 [00:24<00:00, 150.14it/s]


Epoch 9/10, Loss: 0.13423869293220342


100%|██████████| 3750/3750 [00:25<00:00, 149.95it/s]


Epoch 10/10, Loss: 0.1179233610022813


100%|██████████| 3750/3750 [00:10<00:00, 368.64it/s]


Train Set:
Accuracy: 0.968725
Precision: 0.9688810255311249
Recall: 0.968725
F1 Score: 0.9687664397217453
Confusion Matrix: [[28908   157   595   340]
 [   89 29830    40    41]
 [  128    45 28717  1110]
 [  261    20   927 28792]]



100%|██████████| 238/238 [00:00<00:00, 365.61it/s]

Test Set:
Accuracy: 0.9182894736842105
Precision: 0.9185391091489916
Recall: 0.9182894736842105
F1 Score: 0.9183672611504463
Confusion Matrix: [[1736   32   83   49]
 [  14 1852   20   14]
 [  46    8 1694  152]
 [  59   14  130 1697]]






In [32]:
class LSTMModel_Frozen(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel_Frozen, self).__init__()
        self.weights = nn.Parameter(torch.randn(3), requires_grad=False)
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, e1, e2, e3):
        weights_softmax = torch.nn.functional.softmax(self.weights, dim=0)
        x = e1 * weights_softmax[0] + e2 * weights_softmax[1] + e3 * weights_softmax[2]
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

In [33]:
input_size = 200  
hidden_size = 128
output_size = 4
model = LSTMModel_Frozen(input_size, hidden_size, output_size).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=word2id[PAD_TOKEN])
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    total_samples = 0
    
    for batch_X, batch_y in tqdm(train_loader):
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        optimizer.zero_grad()
        outputs = model(e1, e2, e3)
        _, target_indices = batch_y.max(dim=1)
        loss = criterion(outputs, target_indices)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch_X.size(0)
        total_samples += batch_X.size(0)
    epoch_loss = total_loss / total_samples
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss}")

model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for batch_X, batch_y in tqdm(train_loader):
        batch_X = batch_X.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        outputs = model(e1, e2, e3)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(torch.argmax(batch_y, dim=1).cpu().numpy())  
        y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Train Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)
    print()
    
model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for batch_X, batch_y in tqdm(test_loader):
        batch_X = batch_X.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        outputs = model(e1, e2, e3)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(torch.argmax(batch_y, dim=1).cpu().numpy())  
        y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Test Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)
    print()

100%|██████████| 3750/3750 [00:24<00:00, 152.84it/s]


Epoch 1/10, Loss: 0.48557809265752633


100%|██████████| 3750/3750 [00:24<00:00, 152.66it/s]


Epoch 2/10, Loss: 0.3207686462908983


100%|██████████| 3750/3750 [00:24<00:00, 151.88it/s]


Epoch 3/10, Loss: 0.28268957933982214


100%|██████████| 3750/3750 [00:24<00:00, 153.10it/s]


Epoch 4/10, Loss: 0.2565281331380208


100%|██████████| 3750/3750 [00:24<00:00, 151.74it/s]


Epoch 5/10, Loss: 0.23706546026170253


100%|██████████| 3750/3750 [00:24<00:00, 152.23it/s]


Epoch 6/10, Loss: 0.21902322891006867


100%|██████████| 3750/3750 [00:24<00:00, 152.38it/s]


Epoch 7/10, Loss: 0.20462748363912106


100%|██████████| 3750/3750 [00:24<00:00, 152.07it/s]


Epoch 8/10, Loss: 0.190780981417497


100%|██████████| 3750/3750 [00:24<00:00, 152.34it/s]


Epoch 9/10, Loss: 0.17691761648207902


100%|██████████| 3750/3750 [00:24<00:00, 152.05it/s]


Epoch 10/10, Loss: 0.16341493985429406


100%|██████████| 3750/3750 [00:10<00:00, 369.66it/s]


Train Set:
Accuracy: 0.947025
Precision: 0.9471167941478705
Recall: 0.947025
F1 Score: 0.9470310359340889
Confusion Matrix: [[28362   318   827   493]
 [   66 29771   121    42]
 [  434   108 27690  1768]
 [  483   122  1575 27820]]



100%|██████████| 238/238 [00:00<00:00, 372.70it/s]

Test Set:
Accuracy: 0.9140789473684211
Precision: 0.9142387814401838
Recall: 0.9140789473684211
F1 Score: 0.9140920125807337
Confusion Matrix: [[1739   38   73   50]
 [  12 1853   27    8]
 [  43   17 1677  163]
 [  54   19  149 1678]]






In [36]:
class LSTMModel_LearnableFunction(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel_LearnableFunction, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, 512)  
        self.fc2 = nn.Linear(512, output_size)

    def forward(self, e1, e2, e3):
        concatenated_embedding = torch.cat((e1, e2, e3), dim=1)
        lstm_output, _ = self.lstm(concatenated_embedding)
        lstm_output_last = lstm_output[:, -1, :]
        output = torch.relu(self.fc1(lstm_output_last))
        output = self.fc2(output)
        return output


In [37]:
input_size = 200  
hidden_size = 128
output_size = 4
model = LSTMModel_LearnableFunction(input_size, hidden_size, output_size).to(device)

criterion = nn.CrossEntropyLoss(ignore_index=word2id[PAD_TOKEN])
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    total_samples = 0
    
    for batch_X, batch_y in tqdm(train_loader):
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        optimizer.zero_grad()
        outputs = model(e1, e2, e3)
        _, target_indices = batch_y.max(dim=1)
        loss = criterion(outputs, target_indices)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch_X.size(0)
        total_samples += batch_X.size(0)
    epoch_loss = total_loss / total_samples
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss}")

model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for batch_X, batch_y in tqdm(train_loader):
        batch_X = batch_X.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        outputs = model(e1, e2, e3)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(torch.argmax(batch_y, dim=1).cpu().numpy())  
        y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Train Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)
    print()
    
model.eval()
with torch.no_grad():
    y_true = []
    y_pred = []
    for batch_X, batch_y in tqdm(test_loader):
        batch_X = batch_X.to(device)
        e1, e2, e3 = elmo_embed(batch_X)
        e1, e2, e3 = e1.to(device), e2.to(device), e3.to(device)
        outputs = model(e1, e2, e3)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(torch.argmax(batch_y, dim=1).cpu().numpy())  
        y_pred.extend(predicted.cpu().numpy())
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    cm = confusion_matrix(y_true, y_pred)
    print("Test Set:")
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:", cm)
    print()

100%|██████████| 3750/3750 [00:31<00:00, 118.91it/s]


Epoch 1/10, Loss: 0.425501187240084


100%|██████████| 3750/3750 [00:30<00:00, 122.51it/s]


Epoch 2/10, Loss: 0.3329407417173187


100%|██████████| 3750/3750 [00:30<00:00, 122.58it/s]


Epoch 3/10, Loss: 0.3003496779024601


100%|██████████| 3750/3750 [00:30<00:00, 122.28it/s]


Epoch 4/10, Loss: 0.27615042241513726


100%|██████████| 3750/3750 [00:30<00:00, 122.72it/s]


Epoch 5/10, Loss: 0.2585706581488252


100%|██████████| 3750/3750 [00:30<00:00, 122.91it/s]


Epoch 6/10, Loss: 0.24218030960609516


100%|██████████| 3750/3750 [00:30<00:00, 122.87it/s]


Epoch 7/10, Loss: 0.22789891442507507


100%|██████████| 3750/3750 [00:30<00:00, 122.78it/s]


Epoch 8/10, Loss: 0.21410267429426313


100%|██████████| 3750/3750 [00:30<00:00, 122.71it/s]


Epoch 9/10, Loss: 0.1998341369693478


100%|██████████| 3750/3750 [00:30<00:00, 122.72it/s]


Epoch 10/10, Loss: 0.18788738073756298


100%|██████████| 3750/3750 [00:12<00:00, 300.02it/s]


Train Set:
Accuracy: 0.939475
Precision: 0.940556274108918
Recall: 0.939475
F1 Score: 0.9395294676040294
Confusion Matrix: [[27887   344   724  1045]
 [  165 29585    87   163]
 [  549   196 26771  2484]
 [  285   115  1106 28494]]



100%|██████████| 238/238 [00:00<00:00, 300.17it/s]


Test Set:
Accuracy: 0.9017105263157895
Precision: 0.9027609059432867
Recall: 0.9017105263157895
F1 Score: 0.9016840605747216
Confusion Matrix: [[1695   43   73   89]
 [  25 1835   15   25]
 [  60   31 1600  209]
 [  46   17  114 1723]]

