# Word2Vec Embedding

In [1]:
from gensim.models import KeyedVectors
import json
import numpy as np
import torch
from sklearn.metrics import f1_score
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim

In [2]:
label_map = {'B_GPE', 'B_PETITIONER', 'B_ORG', 'I_CASE_NUMBER', 'I_ORG', 'I_STATUTE', 'B_RESPONDENT', 'I_JUDGE', 'I_GPE', 'B_COURT', 'I_DATE', 'B_OTHER_PERSON', 'B_PRECEDENT', 'B_JUDGE', 'O', 'I_PETITIONER', 'I_OTHER_PERSON', 'B_STATUTE', 'I_RESPONDENT', 'B_WITNESS', 'B_CASE_NUMBER', 'I_COURT', 'B_DATE', 'I_WITNESS', 'I_PROVISION', 'I_PRECEDENT', 'B_PROVISION'}

In [3]:
word2vec_model_path = 'GoogleNews-vectors-negative300.bin'
word2vec_model = KeyedVectors.load_word2vec_format(word2vec_model_path, binary=True)


with open('NER_train.json', 'r') as file:
    dataset = json.load(file)

print(len(dataset))

max_seq_len = 315
word_embeddings = []
labels = []

for key, value in dataset.items():
    text = value['text']
    label_seq = value['labels']

    label_list = list(label_map)  
    label_indices = [label_list.index(label) for label in label_seq]
    label_indices_padded = label_indices[:max_seq_len] + [0] * (max_seq_len - len(label_indices))
    labels.append(label_indices_padded)

    vectors = [word2vec_model[word] if word in word2vec_model else np.zeros(300) for word in text] 
    vectors_padded = [np.pad(vec, (0, 300), constant_values=0)[:300] for vec in vectors]
    vectors_padded += [np.zeros(300)] * (max_seq_len - len(vectors_padded))
    word_embeddings.append(vectors_padded)

word_embeddings_array = np.array(word_embeddings)
print(word_embeddings_array.shape)
texts_tensor = torch.tensor(word_embeddings_array, dtype=torch.float32)
labels_tensor = torch.tensor(labels)

8019
(8019, 315, 300)


In [5]:
with open('NER_val.json', 'r') as file:
    valset = json.load(file)

print(len(valset))
max_seq_len = 315

word_embeddings = []
labels = []

for key, value in valset.items():
    text = value['text']
    label_seq = value['labels']

    label_list = list(label_map)  # Store index of each 27 classes created
    label_indices = [label_list.index(label) for label in label_seq]
    label_indices_padded = label_indices[:max_seq_len] + [0] * (max_seq_len - len(label_indices))
    labels.append(label_indices_padded)

    vectors = [word2vec_model[word] if word in word2vec_model else np.zeros(300) for word in text]
    vectors_padded = [np.pad(vec, (0, 300), constant_values=0)[:300] for vec in vectors]
    vectors_padded += [np.zeros(300)] * (max_seq_len - len(vectors_padded))
    word_embeddings.append(vectors_padded)

word_embeddings_array = np.array(word_embeddings)

test_texts_tensor = torch.tensor(word_embeddings_array, dtype=torch.float32)
test_labels_tensor = torch.tensor(labels)

1416


In [6]:
import wandb
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


True

In [7]:
wandb.init(
    project="nlp_ass2", 
    name=f"RNN_Word2Vec"
)

In [8]:
class RNNTagger(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNTagger, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out)
        return out

input_size = 300
hidden_size = 256
output_size = 100

model = RNNTagger(input_size, hidden_size, output_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 45
batch_size = 32 

texts_tensor = texts_tensor.to(device)
labels_tensor = labels_tensor.to(device)
val_texts_tensor = test_texts_tensor
val_labels_tensor =  test_labels_tensor
val_texts_tensor = val_texts_tensor.to(device)
val_labels_tensor = val_labels_tensor.to(device)

save_loss = 0

for epoch in range(num_epochs):
    
    model.train()  
    total_loss=0
    train_predictions = []
    train_labels = []
    for i in range(0, len(texts_tensor), batch_size):
        optimizer.zero_grad()
        batch_texts = texts_tensor[i:i+batch_size]
        batch_labels = labels_tensor[i:i+batch_size].view(-1)
        outputs = model(batch_texts)
        loss = criterion(outputs.view(-1, output_size), batch_labels)
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        train_predictions.extend(torch.argmax(outputs, dim=2).flatten().cpu().tolist())
        train_labels.extend(batch_labels.cpu().tolist())
        
    train_f1 = f1_score(train_labels, train_predictions, average='macro')
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {total_loss/len(texts_tensor):.4f}, Training F1 Score: {train_f1:.4f}')
    
    # Validation
    model.eval()  
    total_val_loss = 0
    val_predictions = []
    val_labels = []
    
    with torch.no_grad():
        for i in range(0, len(val_texts_tensor), batch_size):
            batch_texts_val = val_texts_tensor[i:i+batch_size]
            batch_labels_val = val_labels_tensor[i:i+batch_size].view(-1)
            outputs_val = model(batch_texts_val)
            val_loss = criterion(outputs_val.view(-1, output_size), batch_labels_val)
            total_val_loss += val_loss.item()
            val_predictions.extend(torch.argmax(outputs_val, dim=2).flatten().cpu().tolist())
            val_labels.extend(batch_labels_val.cpu().tolist())
    
    val_f1 = f1_score(val_labels, val_predictions, average='macro')
    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {total_val_loss/len(val_texts_tensor):.4f}, Validation F1 Score: {val_f1:.4f}')
    if(val_f1>save_loss):
        save_loss = val_f1
        torch.save(model.state_dict(),'t1_rnn_word2vec.pt')
    log_metric = {"Epoch": epoch+1, "Training Loss": total_loss/len(texts_tensor), "Training F1 Score": train_f1, "Validation Loss": total_val_loss/len(val_texts_tensor), "Validation F1 Score": val_f1}
    wandb.log(log_metric)
print("Finished Training")
    

Epoch [1/45], Training Loss: 0.0103, Training F1 Score: 0.0257
Epoch [1/45], Validation Loss: 0.0041, Validation F1 Score: 0.0748
Epoch [2/45], Training Loss: 0.0037, Training F1 Score: 0.1278
Epoch [2/45], Validation Loss: 0.0033, Validation F1 Score: 0.1940
Epoch [3/45], Training Loss: 0.0051, Training F1 Score: 0.2070
Epoch [3/45], Validation Loss: 0.0051, Validation F1 Score: 0.1028
Epoch [4/45], Training Loss: 0.0031, Training F1 Score: 0.2429
Epoch [4/45], Validation Loss: 0.0028, Validation F1 Score: 0.2960
Epoch [5/45], Training Loss: 0.0027, Training F1 Score: 0.3357
Epoch [5/45], Validation Loss: 0.0027, Validation F1 Score: 0.3481
Epoch [6/45], Training Loss: 0.0024, Training F1 Score: 0.3729
Epoch [6/45], Validation Loss: 0.0023, Validation F1 Score: 0.3879
Epoch [7/45], Training Loss: 0.0024, Training F1 Score: 0.3890
Epoch [7/45], Validation Loss: 0.0027, Validation F1 Score: 0.3346
Epoch [8/45], Training Loss: 0.0023, Training F1 Score: 0.4001
Epoch [8/45], Validation Lo

In [12]:
with open('NER_test.json', 'r') as file:
    valset = json.load(file)

print(len(valset))
max_seq_len = 315

word_embeddings = []
labels = []

for key, value in valset.items():
    text = value['text']
    label_seq = value['labels']

    label_list = list(label_map)  # Store index of each 27 classes created
    label_indices = [label_list.index(label) for label in label_seq]
    label_indices_padded = label_indices[:max_seq_len] + [0] * (max_seq_len - len(label_indices))
    labels.append(label_indices_padded)

    vectors = [word2vec_model[word] if word in word2vec_model else np.zeros(300) for word in text]
    vectors_padded = [np.pad(vec, (0, 300), constant_values=0)[:300] for vec in vectors]
    vectors_padded += [np.zeros(300)] * (max_seq_len - len(vectors_padded))
    word_embeddings.append(vectors_padded)

word_embeddings_array = np.array(word_embeddings)

test_texts_tensor = torch.tensor(word_embeddings_array, dtype=torch.float32)
test_labels_tensor = torch.tensor(labels)

with torch.no_grad():
    model = RNNTagger(input_size, hidden_size, output_size)
    model = model.to(device)
    model.load_state_dict(torch.load('t1_rnn_word2vec.pt'))
    test_outputs = model(test_texts_tensor.to(device)) 
    argmax_indices = torch.argmax(test_outputs, dim=2)
    reshaped_tensor = argmax_indices.view(949, 315)
    fl_out = reshaped_tensor.flatten()
    fl_label = test_labels_tensor.flatten().to(device) 
    f1 = f1_score(fl_out.cpu(), fl_label.cpu(), average='macro') 
    print("F1 Score on Test Set:", f1)

949
F1 Score on Test Set: 0.5389197786182608


In [11]:
wandb.init(
    project="nlp_ass2", 
    name=f"LSTM_Word2Vec"
)

0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Training F1 Score,▁▂▃▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇█▆▆▇▇▇▆▇▆▇▇▇▇█▇▇▇██▇█
Training Loss,█▃▄▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▂▁
Validation F1 Score,▁▃▁▄▅▆▅▆▆▆▇▇▆▇▇▇▇███▃▇▇▇▇█▄▇▆▇▇█▇▆▇███▄█
Validation Loss,▅▄▇▃▃▂▃▂▂▂▁▁▂▁▁▁▁▁▁▁▄▂▂▁▁▁█▂▂▁▂▁▂▃▂▁▁▁▄▁

0,1
Epoch,45.0
Training F1 Score,0.54157
Training Loss,0.00171
Validation F1 Score,0.51585
Validation Loss,0.00179


In [13]:
class LSTMTagger(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMTagger, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)
        return out

input_size = 300
hidden_size = 256
output_size = 100

model = LSTMTagger(input_size, hidden_size, output_size) # Move model to GPU if available

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 45
batch_size = 32 

texts_tensor = texts_tensor.to(device)
labels_tensor = labels_tensor.to(device)
val_texts_tensor = test_texts_tensor
val_labels_tensor =  test_labels_tensor
val_texts_tensor = val_texts_tensor.to(device)
val_labels_tensor = val_labels_tensor.to(device)

save_loss = 0

for epoch in range(num_epochs):
    
    model.train()  
    total_loss=0
    train_predictions = []
    train_labels = []
    for i in range(0, len(texts_tensor), batch_size):
        optimizer.zero_grad()
        batch_texts = texts_tensor[i:i+batch_size]
        batch_labels = labels_tensor[i:i+batch_size].view(-1)
        outputs = model(batch_texts)
        loss = criterion(outputs.view(-1, output_size), batch_labels)
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        train_predictions.extend(torch.argmax(outputs, dim=2).flatten().cpu().tolist())
        train_labels.extend(batch_labels.cpu().tolist())
        
    train_f1 = f1_score(train_labels, train_predictions, average='macro')
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {total_loss/len(texts_tensor):.4f}, Training F1 Score: {train_f1:.4f}')
    
    # Validation
    model.eval()  
    total_val_loss = 0
    val_predictions = []
    val_labels = []
    
    with torch.no_grad():
        for i in range(0, len(val_texts_tensor), batch_size):
            batch_texts_val = val_texts_tensor[i:i+batch_size]
            batch_labels_val = val_labels_tensor[i:i+batch_size].view(-1)
            outputs_val = model(batch_texts_val)
            val_loss = criterion(outputs_val.view(-1, output_size), batch_labels_val)
            total_val_loss += val_loss.item()
            val_predictions.extend(torch.argmax(outputs_val, dim=2).flatten().cpu().tolist())
            val_labels.extend(batch_labels_val.cpu().tolist())
    
    val_f1 = f1_score(val_labels, val_predictions, average='macro')
    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {total_val_loss/len(val_texts_tensor):.4f}, Validation F1 Score: {val_f1:.4f}')
    if(val_f1>save_loss):
        save_loss = val_f1
        torch.save(model.state_dict(),'t1_lstm_word2vec.pt')
    log_metric = {"Epoch": epoch+1, "Training Loss": total_loss/len(texts_tensor), "Training F1 Score": train_f1, "Validation Loss": total_val_loss/len(val_texts_tensor), "Validation F1 Score": val_f1}
    wandb.log(log_metric)
print("Finished Training")
    

Epoch [1/45], Training Loss: 0.0114, Training F1 Score: 0.0547
Epoch [1/45], Validation Loss: 0.0040, Validation F1 Score: 0.1223
Epoch [2/45], Training Loss: 0.0036, Training F1 Score: 0.1500
Epoch [2/45], Validation Loss: 0.0034, Validation F1 Score: 0.2138
Epoch [3/45], Training Loss: 0.0027, Training F1 Score: 0.2766
Epoch [3/45], Validation Loss: 0.0027, Validation F1 Score: 0.3101
Epoch [4/45], Training Loss: 0.0023, Training F1 Score: 0.3454
Epoch [4/45], Validation Loss: 0.0024, Validation F1 Score: 0.3732
Epoch [5/45], Training Loss: 0.0021, Training F1 Score: 0.3851
Epoch [5/45], Validation Loss: 0.0024, Validation F1 Score: 0.3926
Epoch [6/45], Training Loss: 0.0020, Training F1 Score: 0.4078
Epoch [6/45], Validation Loss: 0.0022, Validation F1 Score: 0.4174
Epoch [7/45], Training Loss: 0.0019, Training F1 Score: 0.4299
Epoch [7/45], Validation Loss: 0.0023, Validation F1 Score: 0.4294
Epoch [8/45], Training Loss: 0.0018, Training F1 Score: 0.4475
Epoch [8/45], Validation Lo

In [14]:
with open('NER_test.json', 'r') as file:
    valset = json.load(file)

print(len(valset))
max_seq_len = 315

word_embeddings = []
labels = []

for key, value in valset.items():
    text = value['text']
    label_seq = value['labels']

    label_list = list(label_map)  # Store index of each 27 classes created
    label_indices = [label_list.index(label) for label in label_seq]
    label_indices_padded = label_indices[:max_seq_len] + [0] * (max_seq_len - len(label_indices))
    labels.append(label_indices_padded)

    vectors = [word2vec_model[word] if word in word2vec_model else np.zeros(300) for word in text]
    vectors_padded = [np.pad(vec, (0, 300), constant_values=0)[:300] for vec in vectors]
    vectors_padded += [np.zeros(300)] * (max_seq_len - len(vectors_padded))
    word_embeddings.append(vectors_padded)

word_embeddings_array = np.array(word_embeddings)

test_texts_tensor = torch.tensor(word_embeddings_array, dtype=torch.float32)
test_labels_tensor = torch.tensor(labels)

with torch.no_grad():
    model = LSTMTagger(input_size, hidden_size, output_size)
    model = model.to(device)
    model.load_state_dict(torch.load('t1_lstm_word2vec.pt'))
    test_outputs = model(test_texts_tensor.to(device)) 
    argmax_indices = torch.argmax(test_outputs, dim=2)
    reshaped_tensor = argmax_indices.view(949, 315)
    fl_out = reshaped_tensor.flatten()
    fl_label = test_labels_tensor.flatten().to(device) 
    f1 = f1_score(fl_out.cpu(), fl_label.cpu(), average='macro') 
    print("F1 Score on Test Set:", f1)

949
F1 Score on Test Set: 0.5710304601979326


In [15]:
wandb.init(
    project="nlp_ass2", 
    name=f"GRU_Word2Vec"
)

0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Training F1 Score,▁▂▃▄▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇███████
Training Loss,█▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation F1 Score,▁▂▄▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇█▆██████████████████
Validation Loss,█▆▃▃▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂

0,1
Epoch,45.0
Training F1 Score,0.73893
Training Loss,0.00086
Validation F1 Score,0.55483
Validation Loss,0.00233


In [16]:
class GRUTagger(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUTagger, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.gru1 = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.5) 

    def forward(self, x):
        out, _ = self.gru(x)
        out = torch.relu(out)
        out, _ = self.gru1(out)
        out = torch.relu(out)
        out = self.dropout(out)  
        out = self.fc1(out)
        return out
    
input_size = 300
hidden_size = 256
output_size = 27

model = GRUTagger(input_size, hidden_size, output_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 45
batch_size = 32 

texts_tensor = texts_tensor.to(device)
labels_tensor = labels_tensor.to(device)
val_texts_tensor = test_texts_tensor
val_labels_tensor =  test_labels_tensor
val_texts_tensor = val_texts_tensor.to(device)
val_labels_tensor = val_labels_tensor.to(device)

save_loss = 0

for epoch in range(num_epochs):
    
    model.train()  
    total_loss=0
    train_predictions = []
    train_labels = []
    for i in range(0, len(texts_tensor), batch_size):
        optimizer.zero_grad()
        batch_texts = texts_tensor[i:i+batch_size]
        batch_labels = labels_tensor[i:i+batch_size].view(-1)
        outputs = model(batch_texts)
        loss = criterion(outputs.view(-1, output_size), batch_labels)
        loss.backward()
        optimizer.step()
        total_loss+=loss.item()
        train_predictions.extend(torch.argmax(outputs, dim=2).flatten().cpu().tolist())
        train_labels.extend(batch_labels.cpu().tolist())
        
    train_f1 = f1_score(train_labels, train_predictions, average='macro')
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {total_loss/len(texts_tensor):.4f}, Training F1 Score: {train_f1:.4f}')
    
    # Validation
    model.eval()  
    total_val_loss = 0
    val_predictions = []
    val_labels = []
    
    with torch.no_grad():
        for i in range(0, len(val_texts_tensor), batch_size):
            batch_texts_val = val_texts_tensor[i:i+batch_size]
            batch_labels_val = val_labels_tensor[i:i+batch_size].view(-1)
            outputs_val = model(batch_texts_val)
            val_loss = criterion(outputs_val.view(-1, output_size), batch_labels_val)
            total_val_loss += val_loss.item()
            val_predictions.extend(torch.argmax(outputs_val, dim=2).flatten().cpu().tolist())
            val_labels.extend(batch_labels_val.cpu().tolist())
    
    val_f1 = f1_score(val_labels, val_predictions, average='macro')
    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Loss: {total_val_loss/len(val_texts_tensor):.4f}, Validation F1 Score: {val_f1:.4f}')
    if(val_f1>save_loss):
        save_loss = val_f1
        torch.save(model.state_dict(),'t1_gru_word2vec.pt')
    log_metric = {"Epoch": epoch+1, "Training Loss": total_loss/len(texts_tensor), "Training F1 Score": train_f1, "Validation Loss": total_val_loss/len(val_texts_tensor), "Validation F1 Score": val_f1}
    wandb.log(log_metric)
print("Finished Training")
    

Epoch [1/45], Training Loss: 0.0097, Training F1 Score: 0.0662
Epoch [1/45], Validation Loss: 0.0046, Validation F1 Score: 0.0689
Epoch [2/45], Training Loss: 0.0038, Training F1 Score: 0.0779
Epoch [2/45], Validation Loss: 0.0038, Validation F1 Score: 0.0892
Epoch [3/45], Training Loss: 0.0033, Training F1 Score: 0.1034
Epoch [3/45], Validation Loss: 0.0033, Validation F1 Score: 0.1069
Epoch [4/45], Training Loss: 0.0028, Training F1 Score: 0.1902
Epoch [4/45], Validation Loss: 0.0028, Validation F1 Score: 0.2408
Epoch [5/45], Training Loss: 0.0025, Training F1 Score: 0.2856
Epoch [5/45], Validation Loss: 0.0024, Validation F1 Score: 0.3574
Epoch [6/45], Training Loss: 0.0022, Training F1 Score: 0.3493
Epoch [6/45], Validation Loss: 0.0022, Validation F1 Score: 0.3913
Epoch [7/45], Training Loss: 0.0020, Training F1 Score: 0.3898
Epoch [7/45], Validation Loss: 0.0021, Validation F1 Score: 0.4196
Epoch [8/45], Training Loss: 0.0019, Training F1 Score: 0.4218
Epoch [8/45], Validation Lo

In [17]:
with open('NER_test.json', 'r') as file:
    valset = json.load(file)

print(len(valset))
max_seq_len = 315

word_embeddings = []
labels = []

for key, value in valset.items():
    text = value['text']
    label_seq = value['labels']

    label_list = list(label_map)  # Store index of each 27 classes created
    label_indices = [label_list.index(label) for label in label_seq]
    label_indices_padded = label_indices[:max_seq_len] + [0] * (max_seq_len - len(label_indices))
    labels.append(label_indices_padded)

    vectors = [word2vec_model[word] if word in word2vec_model else np.zeros(300) for word in text]
    vectors_padded = [np.pad(vec, (0, 300), constant_values=0)[:300] for vec in vectors]
    vectors_padded += [np.zeros(300)] * (max_seq_len - len(vectors_padded))
    word_embeddings.append(vectors_padded)

word_embeddings_array = np.array(word_embeddings)

test_texts_tensor = torch.tensor(word_embeddings_array, dtype=torch.float32)
test_labels_tensor = torch.tensor(labels)

with torch.no_grad():
    model = GRUTagger(input_size, hidden_size, output_size)
    model = model.to(device)
    model.load_state_dict(torch.load('t1_gru_word2vec.pt'))
    test_outputs = model(test_texts_tensor.to(device)) 
    argmax_indices = torch.argmax(test_outputs, dim=2)
    reshaped_tensor = argmax_indices.view(949, 315)
    fl_out = reshaped_tensor.flatten()
    fl_label = test_labels_tensor.flatten().to(device) 
    f1 = f1_score(fl_out.cpu(), fl_label.cpu(), average='macro') 
    print("F1 Score on Test Set:", f1)

949
F1 Score on Test Set: 0.5393807393170915
