## Sentiment analysis models (Deep Learning)

### Importing all the required libraries

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import f1_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Loading the data from csv file

In [2]:
data = pd.read_csv("tweet_sentiment.csv")
data["cleaned_text"] = data["cleaned_text"].fillna("")
data['label'] = data['label'].replace({-1: 0, 0: 1, 1: 2})

### Converting the predictors to TF-IDF vectors

In [3]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
tfidf_vectors = tfidf_vectorizer.fit_transform(data['cleaned_text'])

In [4]:
#bow_vectorizer = CountVectorizer(max_features=5000)
#bow_vectors = bow_vectorizer.fit_transform(data['cleaned_text'])

#print("BoW shape:", bow_vectors.shape)

In [5]:
X_tfidf = tfidf_vectors.toarray()
#X_bow = bow_vectors.toarray()

y = data['label']

### Splitting the data into test, train and validation sets

In [6]:
X_trainval_tfidf, X_test_tfidf, y_trainval_tfidf, y_test_tfidf = train_test_split(X_tfidf, y, test_size=0.2, stratify=y, random_state=2025)
X_train_tfidf, X_val_tfidf, y_train_tfidf, y_val_tfidf = train_test_split(X_trainval_tfidf, y_trainval_tfidf, test_size=0.2, stratify=y_trainval_tfidf, random_state=42)

In [7]:
#X_trainval_bow, X_test_bow, y_trainval_bow, y_test_bow = train_test_split(X_bow, y, test_size=0.2, random_state = 2025)
#X_train_bow, X_val_bow, y_train_bow, y_val_bow = train_test_split(X_trainval_bow, y_trainval_bow, test_size=0.2, random_state = 2025)

### Creating a simple feed forward neural network

In [8]:
class FFNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, output_dim=3):
        super(FFNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return self.softmax(x)

### Creating a funtion to feed data to the neural network

In [7]:
class SentimentDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y.values, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

### Creating a function that creates dataasets for training, validation and testing

In [8]:
def collate_fn(batch):
    inputs = torch.stack([item[0] for item in batch])
    labels = torch.tensor([item[1] for item in batch], dtype=torch.long)
    return inputs, labels

def create_dataloader(X, y, batch_size, shuffle=False):
    dataset = SentimentDataset(X, y)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)

#batch_size = 32

#train_dataset_tfidf = SentimentDataset(X_train_tfidf, y_train_tfidf)
#val_dataset_tfidf = SentimentDataset(X_val_tfidf, y_val_tfidf)
#test_dataset_tfidf = SentimentDataset(X_test_tfidf, y_test_tfidf)

#train_loader_tfidf = DataLoader(train_dataset_tfidf, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
#val_loader_tfidf = DataLoader(val_dataset_tfidf, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
#test_loader_tfidf = DataLoader(test_dataset_tfidf, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

### Hyperparameters

In [9]:
learning_rates = [0.001, 0.0005, 0.01]
hidden_dims = [64, 128, 256]
batch_sizes = [32, 64]

### Creating a function to train the simple neural network

In [14]:
def train_model(model, train_loader, val_loader, lr, epochs = 100):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    epoch_losses = []
    epoch_f1_scores = []
    
    best_model = None
    best_f1 = 0

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(train_loader)
        epoch_losses.append(avg_loss)

        model.eval()
        val_preds = []
        val_labels = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                val_preds.extend(predicted.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        val_f1 = f1_score(val_labels, val_preds, average='weighted')
        epoch_f1_scores.append(val_f1)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Validation F1 Score: {val_f1:.4f}")

        if val_f1 > best_f1:
            best_f1 = val_f1
            best_model = model.state_dict()

    return best_model, best_f1


### Creating the function to evaluate the model

In [10]:
def evaluate_model(model, test_loader):
    model.eval()
    test_preds = []
    test_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            test_preds.extend(predicted.cpu().numpy())
            test_labels.extend(labels.cpu().numpy())

    f1 = f1_score(test_labels, test_preds, average='weighted')
    return f1


### Training the model using different hyperparameters and finalizing the best model

In [16]:
best_model = None
best_f1_score = 0
best_params = {}

for lr in learning_rates:
    for hidden_dim in hidden_dims:
        for batch_size in batch_sizes:
            print(f"Training with lr={lr}, hidden_dim={hidden_dim}, batch_size={batch_size}")
            
            train_loader = create_dataloader(X_train_tfidf, y_train_tfidf, batch_size, shuffle=True)
            val_loader = create_dataloader(X_val_tfidf, y_val_tfidf, batch_size, shuffle=False)
            test_loader = create_dataloader(X_test_tfidf, y_test_tfidf, batch_size, shuffle=False)

            model = FFNN(input_dim=X_train_tfidf.shape[1], hidden_dim=hidden_dim)

            best_model, val_f1 = train_model(model, train_loader, val_loader, lr)

            model.load_state_dict(best_model)
            test_f1 = evaluate_model(model, test_loader)

            print(f"Test F1 Score: {test_f1}")

            if test_f1 > best_f1_score:
                best_f1_score = test_f1
                best_params = {
                    "learning_rate": lr,
                    "hidden_dim": hidden_dim,
                    "batch_size": batch_size
                }

Training with lr=0.001, hidden_dim=64, batch_size=32
Epoch 1/100, Loss: 1.0939, Validation F1 Score: 0.4835
Epoch 2/100, Loss: 1.0374, Validation F1 Score: 0.5954
Epoch 3/100, Loss: 0.9080, Validation F1 Score: 0.6299
Epoch 4/100, Loss: 0.7962, Validation F1 Score: 0.6307
Epoch 5/100, Loss: 0.7287, Validation F1 Score: 0.6270
Epoch 6/100, Loss: 0.6878, Validation F1 Score: 0.6166
Epoch 7/100, Loss: 0.6625, Validation F1 Score: 0.6206
Epoch 8/100, Loss: 0.6461, Validation F1 Score: 0.6129
Epoch 9/100, Loss: 0.6337, Validation F1 Score: 0.6147
Epoch 10/100, Loss: 0.6257, Validation F1 Score: 0.6089
Epoch 11/100, Loss: 0.6198, Validation F1 Score: 0.6100
Epoch 12/100, Loss: 0.6165, Validation F1 Score: 0.6080
Epoch 13/100, Loss: 0.6120, Validation F1 Score: 0.6161
Epoch 14/100, Loss: 0.6091, Validation F1 Score: 0.6102
Epoch 15/100, Loss: 0.6073, Validation F1 Score: 0.6116
Epoch 16/100, Loss: 0.6049, Validation F1 Score: 0.6140
Epoch 17/100, Loss: 0.6034, Validation F1 Score: 0.6112
Epoc

Epoch 46/100, Loss: 0.5956, Validation F1 Score: 0.6087
Epoch 47/100, Loss: 0.5956, Validation F1 Score: 0.6129
Epoch 48/100, Loss: 0.5956, Validation F1 Score: 0.6115
Epoch 49/100, Loss: 0.5955, Validation F1 Score: 0.6141
Epoch 50/100, Loss: 0.5953, Validation F1 Score: 0.6154
Epoch 51/100, Loss: 0.5949, Validation F1 Score: 0.6154
Epoch 52/100, Loss: 0.5951, Validation F1 Score: 0.6141
Epoch 53/100, Loss: 0.5952, Validation F1 Score: 0.6129
Epoch 54/100, Loss: 0.5948, Validation F1 Score: 0.6141
Epoch 55/100, Loss: 0.5949, Validation F1 Score: 0.6129
Epoch 56/100, Loss: 0.5948, Validation F1 Score: 0.6141
Epoch 57/100, Loss: 0.5948, Validation F1 Score: 0.6116
Epoch 58/100, Loss: 0.5950, Validation F1 Score: 0.6116
Epoch 59/100, Loss: 0.5944, Validation F1 Score: 0.6129
Epoch 60/100, Loss: 0.5945, Validation F1 Score: 0.6099
Epoch 61/100, Loss: 0.5951, Validation F1 Score: 0.6129
Epoch 62/100, Loss: 0.5948, Validation F1 Score: 0.6115
Epoch 63/100, Loss: 0.5942, Validation F1 Score:

Epoch 91/100, Loss: 0.5953, Validation F1 Score: 0.5992
Epoch 92/100, Loss: 0.5947, Validation F1 Score: 0.6003
Epoch 93/100, Loss: 0.5942, Validation F1 Score: 0.6042
Epoch 94/100, Loss: 0.5947, Validation F1 Score: 0.6015
Epoch 95/100, Loss: 0.5942, Validation F1 Score: 0.6004
Epoch 96/100, Loss: 0.5947, Validation F1 Score: 0.6017
Epoch 97/100, Loss: 0.5947, Validation F1 Score: 0.5978
Epoch 98/100, Loss: 0.5942, Validation F1 Score: 0.5992
Epoch 99/100, Loss: 0.5942, Validation F1 Score: 0.5978
Epoch 100/100, Loss: 0.5942, Validation F1 Score: 0.6003
Test F1 Score: 0.6290323221115959
Training with lr=0.001, hidden_dim=128, batch_size=64
Epoch 1/100, Loss: 1.0955, Validation F1 Score: 0.3653
Epoch 2/100, Loss: 1.0561, Validation F1 Score: 0.5504
Epoch 3/100, Loss: 0.9528, Validation F1 Score: 0.6153
Epoch 4/100, Loss: 0.8340, Validation F1 Score: 0.6259
Epoch 5/100, Loss: 0.7527, Validation F1 Score: 0.6346
Epoch 6/100, Loss: 0.7048, Validation F1 Score: 0.6214
Epoch 7/100, Loss: 0.

Epoch 35/100, Loss: 0.5948, Validation F1 Score: 0.6097
Epoch 36/100, Loss: 0.5953, Validation F1 Score: 0.6071
Epoch 37/100, Loss: 0.5947, Validation F1 Score: 0.6083
Epoch 38/100, Loss: 0.5952, Validation F1 Score: 0.6083
Epoch 39/100, Loss: 0.5952, Validation F1 Score: 0.6070
Epoch 40/100, Loss: 0.5947, Validation F1 Score: 0.6083
Epoch 41/100, Loss: 0.5947, Validation F1 Score: 0.6083
Epoch 42/100, Loss: 0.5952, Validation F1 Score: 0.6098
Epoch 43/100, Loss: 0.5952, Validation F1 Score: 0.6086
Epoch 44/100, Loss: 0.5946, Validation F1 Score: 0.6086
Epoch 45/100, Loss: 0.5952, Validation F1 Score: 0.6086
Epoch 46/100, Loss: 0.5957, Validation F1 Score: 0.6072
Epoch 47/100, Loss: 0.5946, Validation F1 Score: 0.6071
Epoch 48/100, Loss: 0.5946, Validation F1 Score: 0.6086
Epoch 49/100, Loss: 0.5946, Validation F1 Score: 0.6073
Epoch 50/100, Loss: 0.5951, Validation F1 Score: 0.6086
Epoch 51/100, Loss: 0.5956, Validation F1 Score: 0.6086
Epoch 52/100, Loss: 0.5946, Validation F1 Score:

Epoch 80/100, Loss: 0.5940, Validation F1 Score: 0.6103
Epoch 81/100, Loss: 0.5947, Validation F1 Score: 0.6053
Epoch 82/100, Loss: 0.5941, Validation F1 Score: 0.6057
Epoch 83/100, Loss: 0.5943, Validation F1 Score: 0.6057
Epoch 84/100, Loss: 0.5942, Validation F1 Score: 0.6079
Epoch 85/100, Loss: 0.5940, Validation F1 Score: 0.6087
Epoch 86/100, Loss: 0.5942, Validation F1 Score: 0.6104
Epoch 87/100, Loss: 0.5941, Validation F1 Score: 0.6106
Epoch 88/100, Loss: 0.5943, Validation F1 Score: 0.6055
Epoch 89/100, Loss: 0.5939, Validation F1 Score: 0.6072
Epoch 90/100, Loss: 0.5941, Validation F1 Score: 0.6093
Epoch 91/100, Loss: 0.5942, Validation F1 Score: 0.6104
Epoch 92/100, Loss: 0.5942, Validation F1 Score: 0.6079
Epoch 93/100, Loss: 0.5944, Validation F1 Score: 0.6069
Epoch 94/100, Loss: 0.5942, Validation F1 Score: 0.6055
Epoch 95/100, Loss: 0.5943, Validation F1 Score: 0.6082
Epoch 96/100, Loss: 0.5945, Validation F1 Score: 0.6090
Epoch 97/100, Loss: 0.5940, Validation F1 Score:

Epoch 24/100, Loss: 0.6381, Validation F1 Score: 0.6131
Epoch 25/100, Loss: 0.6346, Validation F1 Score: 0.6130
Epoch 26/100, Loss: 0.6317, Validation F1 Score: 0.6120
Epoch 27/100, Loss: 0.6290, Validation F1 Score: 0.6132
Epoch 28/100, Loss: 0.6270, Validation F1 Score: 0.6119
Epoch 29/100, Loss: 0.6244, Validation F1 Score: 0.6134
Epoch 30/100, Loss: 0.6223, Validation F1 Score: 0.6148
Epoch 31/100, Loss: 0.6206, Validation F1 Score: 0.6135
Epoch 32/100, Loss: 0.6187, Validation F1 Score: 0.6149
Epoch 33/100, Loss: 0.6172, Validation F1 Score: 0.6136
Epoch 34/100, Loss: 0.6156, Validation F1 Score: 0.6110
Epoch 35/100, Loss: 0.6143, Validation F1 Score: 0.6087
Epoch 36/100, Loss: 0.6131, Validation F1 Score: 0.6110
Epoch 37/100, Loss: 0.6118, Validation F1 Score: 0.6099
Epoch 38/100, Loss: 0.6109, Validation F1 Score: 0.6110
Epoch 39/100, Loss: 0.6098, Validation F1 Score: 0.6099
Epoch 40/100, Loss: 0.6091, Validation F1 Score: 0.6099
Epoch 41/100, Loss: 0.6082, Validation F1 Score:

Epoch 69/100, Loss: 0.5949, Validation F1 Score: 0.6095
Epoch 70/100, Loss: 0.5944, Validation F1 Score: 0.6107
Epoch 71/100, Loss: 0.5944, Validation F1 Score: 0.6083
Epoch 72/100, Loss: 0.5938, Validation F1 Score: 0.6095
Epoch 73/100, Loss: 0.5943, Validation F1 Score: 0.6107
Epoch 74/100, Loss: 0.5938, Validation F1 Score: 0.6107
Epoch 75/100, Loss: 0.5948, Validation F1 Score: 0.6107
Epoch 76/100, Loss: 0.5942, Validation F1 Score: 0.6097
Epoch 77/100, Loss: 0.5938, Validation F1 Score: 0.6084
Epoch 78/100, Loss: 0.5938, Validation F1 Score: 0.6095
Epoch 79/100, Loss: 0.5937, Validation F1 Score: 0.6132
Epoch 80/100, Loss: 0.5937, Validation F1 Score: 0.6045
Epoch 81/100, Loss: 0.5937, Validation F1 Score: 0.6096
Epoch 82/100, Loss: 0.5948, Validation F1 Score: 0.6083
Epoch 83/100, Loss: 0.5937, Validation F1 Score: 0.6096
Epoch 84/100, Loss: 0.5937, Validation F1 Score: 0.6060
Epoch 85/100, Loss: 0.5936, Validation F1 Score: 0.6084
Epoch 86/100, Loss: 0.5942, Validation F1 Score:

Epoch 13/100, Loss: 0.6124, Validation F1 Score: 0.6119
Epoch 14/100, Loss: 0.6100, Validation F1 Score: 0.6090
Epoch 15/100, Loss: 0.6075, Validation F1 Score: 0.6160
Epoch 16/100, Loss: 0.6063, Validation F1 Score: 0.6148
Epoch 17/100, Loss: 0.6044, Validation F1 Score: 0.6112
Epoch 18/100, Loss: 0.6026, Validation F1 Score: 0.6091
Epoch 19/100, Loss: 0.6019, Validation F1 Score: 0.6124
Epoch 20/100, Loss: 0.6007, Validation F1 Score: 0.6111
Epoch 21/100, Loss: 0.6008, Validation F1 Score: 0.6070
Epoch 22/100, Loss: 0.5993, Validation F1 Score: 0.6113
Epoch 23/100, Loss: 0.5992, Validation F1 Score: 0.6071
Epoch 24/100, Loss: 0.5982, Validation F1 Score: 0.6097
Epoch 25/100, Loss: 0.5978, Validation F1 Score: 0.6074
Epoch 26/100, Loss: 0.5976, Validation F1 Score: 0.6072
Epoch 27/100, Loss: 0.5972, Validation F1 Score: 0.6113
Epoch 28/100, Loss: 0.5969, Validation F1 Score: 0.6101
Epoch 29/100, Loss: 0.5967, Validation F1 Score: 0.6068
Epoch 30/100, Loss: 0.5966, Validation F1 Score:

Epoch 58/100, Loss: 0.5949, Validation F1 Score: 0.6071
Epoch 59/100, Loss: 0.5950, Validation F1 Score: 0.6044
Epoch 60/100, Loss: 0.5946, Validation F1 Score: 0.6071
Epoch 61/100, Loss: 0.5947, Validation F1 Score: 0.6056
Epoch 62/100, Loss: 0.5946, Validation F1 Score: 0.6055
Epoch 63/100, Loss: 0.5947, Validation F1 Score: 0.6070
Epoch 64/100, Loss: 0.5947, Validation F1 Score: 0.6055
Epoch 65/100, Loss: 0.5948, Validation F1 Score: 0.6042
Epoch 66/100, Loss: 0.5947, Validation F1 Score: 0.6055
Epoch 67/100, Loss: 0.5946, Validation F1 Score: 0.6055
Epoch 68/100, Loss: 0.5943, Validation F1 Score: 0.6042
Epoch 69/100, Loss: 0.5949, Validation F1 Score: 0.6068
Epoch 70/100, Loss: 0.5944, Validation F1 Score: 0.6042
Epoch 71/100, Loss: 0.5947, Validation F1 Score: 0.6068
Epoch 72/100, Loss: 0.5948, Validation F1 Score: 0.6084
Epoch 73/100, Loss: 0.5944, Validation F1 Score: 0.6055
Epoch 74/100, Loss: 0.5943, Validation F1 Score: 0.6070
Epoch 75/100, Loss: 0.5941, Validation F1 Score:

Epoch 2/100, Loss: 0.7625, Validation F1 Score: 0.6305
Epoch 3/100, Loss: 0.6465, Validation F1 Score: 0.6215
Epoch 4/100, Loss: 0.6199, Validation F1 Score: 0.6162
Epoch 5/100, Loss: 0.6114, Validation F1 Score: 0.6176
Epoch 6/100, Loss: 0.6070, Validation F1 Score: 0.6179
Epoch 7/100, Loss: 0.6048, Validation F1 Score: 0.6183
Epoch 8/100, Loss: 0.6035, Validation F1 Score: 0.6194
Epoch 9/100, Loss: 0.6022, Validation F1 Score: 0.6073
Epoch 10/100, Loss: 0.6026, Validation F1 Score: 0.6079
Epoch 11/100, Loss: 0.6015, Validation F1 Score: 0.6108
Epoch 12/100, Loss: 0.6007, Validation F1 Score: 0.6129
Epoch 13/100, Loss: 0.6003, Validation F1 Score: 0.6133
Epoch 14/100, Loss: 0.6000, Validation F1 Score: 0.6102
Epoch 15/100, Loss: 0.5991, Validation F1 Score: 0.6114
Epoch 16/100, Loss: 0.5988, Validation F1 Score: 0.6100
Epoch 17/100, Loss: 0.5990, Validation F1 Score: 0.6114
Epoch 18/100, Loss: 0.5990, Validation F1 Score: 0.6128
Epoch 19/100, Loss: 0.5988, Validation F1 Score: 0.6115


Epoch 48/100, Loss: 0.6015, Validation F1 Score: 0.6160
Epoch 49/100, Loss: 0.6015, Validation F1 Score: 0.6215
Epoch 50/100, Loss: 0.6009, Validation F1 Score: 0.6239
Epoch 51/100, Loss: 0.6009, Validation F1 Score: 0.6240
Epoch 52/100, Loss: 0.6014, Validation F1 Score: 0.6226
Epoch 53/100, Loss: 0.6009, Validation F1 Score: 0.6251
Epoch 54/100, Loss: 0.6019, Validation F1 Score: 0.6263
Epoch 55/100, Loss: 0.6019, Validation F1 Score: 0.6251
Epoch 56/100, Loss: 0.6009, Validation F1 Score: 0.6212
Epoch 57/100, Loss: 0.6019, Validation F1 Score: 0.6138
Epoch 58/100, Loss: 0.6014, Validation F1 Score: 0.6143
Epoch 59/100, Loss: 0.6009, Validation F1 Score: 0.6154
Epoch 60/100, Loss: 0.6009, Validation F1 Score: 0.6165
Epoch 61/100, Loss: 0.6013, Validation F1 Score: 0.5821
Epoch 62/100, Loss: 0.6010, Validation F1 Score: 0.6163
Epoch 63/100, Loss: 0.6010, Validation F1 Score: 0.6150
Epoch 64/100, Loss: 0.6024, Validation F1 Score: 0.6030
Epoch 65/100, Loss: 0.6022, Validation F1 Score:

Epoch 93/100, Loss: 0.5978, Validation F1 Score: 0.6083
Epoch 94/100, Loss: 0.5978, Validation F1 Score: 0.6083
Epoch 95/100, Loss: 0.5976, Validation F1 Score: 0.6064
Epoch 96/100, Loss: 0.5975, Validation F1 Score: 0.6093
Epoch 97/100, Loss: 0.5979, Validation F1 Score: 0.6063
Epoch 98/100, Loss: 0.5978, Validation F1 Score: 0.6076
Epoch 99/100, Loss: 0.5979, Validation F1 Score: 0.6062
Epoch 100/100, Loss: 0.5975, Validation F1 Score: 0.6133
Test F1 Score: 0.6102944798345317
Training with lr=0.01, hidden_dim=256, batch_size=32
Epoch 1/100, Loss: 0.9974, Validation F1 Score: 0.6436
Epoch 2/100, Loss: 0.7335, Validation F1 Score: 0.6294
Epoch 3/100, Loss: 0.6490, Validation F1 Score: 0.6125
Epoch 4/100, Loss: 0.6285, Validation F1 Score: 0.6169
Epoch 5/100, Loss: 0.6211, Validation F1 Score: 0.6102
Epoch 6/100, Loss: 0.6171, Validation F1 Score: 0.6107
Epoch 7/100, Loss: 0.6153, Validation F1 Score: 0.6037
Epoch 8/100, Loss: 0.6131, Validation F1 Score: 0.6085
Epoch 9/100, Loss: 0.612

Epoch 37/100, Loss: 0.5988, Validation F1 Score: 0.6054
Epoch 38/100, Loss: 0.5989, Validation F1 Score: 0.6067
Epoch 39/100, Loss: 0.5986, Validation F1 Score: 0.6022
Epoch 40/100, Loss: 0.5988, Validation F1 Score: 0.6049
Epoch 41/100, Loss: 0.5988, Validation F1 Score: 0.6034
Epoch 42/100, Loss: 0.5991, Validation F1 Score: 0.6020
Epoch 43/100, Loss: 0.5992, Validation F1 Score: 0.6020
Epoch 44/100, Loss: 0.5994, Validation F1 Score: 0.6035
Epoch 45/100, Loss: 0.5991, Validation F1 Score: 0.6035
Epoch 46/100, Loss: 0.5988, Validation F1 Score: 0.6034
Epoch 47/100, Loss: 0.5991, Validation F1 Score: 0.6020
Epoch 48/100, Loss: 0.5988, Validation F1 Score: 0.6020
Epoch 49/100, Loss: 0.5990, Validation F1 Score: 0.6071
Epoch 50/100, Loss: 0.5998, Validation F1 Score: 0.5992
Epoch 51/100, Loss: 0.5996, Validation F1 Score: 0.6106
Epoch 52/100, Loss: 0.6002, Validation F1 Score: 0.5913
Epoch 53/100, Loss: 0.6000, Validation F1 Score: 0.6023
Epoch 54/100, Loss: 0.6003, Validation F1 Score:

In [17]:
print(f"Best F1 Score: {best_f1_score}")
print(f"Best Parameters: {best_params}")

Best F1 Score: 0.646367424749827
Best Parameters: {'learning_rate': 0.001, 'hidden_dim': 64, 'batch_size': 32}


### Creating a simple LSTM model

In [11]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, output_dim=3, embedding_dim=100):
        super(LSTMModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, embedding_dim)  
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.fc1(x)
        x, (h_n, c_n) = self.lstm(x.unsqueeze(1))  
        output = self.fc2(h_n[-1])  
        return self.softmax(output)

### Creating a train function to train the LSTM

In [12]:
input_dim = X_train_tfidf.shape[1]

def train_model_for_all_params_lstm(train_loader, val_loader, learning_rates, hidden_dims, batch_sizes, epochs = 100):
    all_results = []

    for lr in learning_rates:
        for hidden_dim in hidden_dims:
            for batch_size in batch_sizes:
                
                model = LSTMModel(input_dim=input_dim, hidden_dim=hidden_dim)

                train_loader = DataLoader(SentimentDataset(X_train_tfidf, y_train_tfidf), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
                val_loader = DataLoader(SentimentDataset(X_val_tfidf, y_val_tfidf), batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

                criterion = nn.CrossEntropyLoss()
                optimizer = optim.Adam(model.parameters(), lr=lr)

                epoch_losses = []
                epoch_f1_scores = []

                best_f1 = 0
                best_model = None

                for epoch in range(epochs):
                    model.train()
                    running_loss = 0.0

                    for inputs, labels in train_loader:
                        optimizer.zero_grad()
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        loss.backward()
                        optimizer.step()

                        running_loss += loss.item()

                    avg_loss = running_loss / len(train_loader)
                    epoch_losses.append(avg_loss)

                    model.eval()
                    val_preds = []
                    val_labels = []

                    with torch.no_grad():
                        for inputs, labels in val_loader:
                            outputs = model(inputs)
                            _, predicted = torch.max(outputs, 1)
                            val_preds.extend(predicted.cpu().numpy())
                            val_labels.extend(labels.cpu().numpy())

                    val_f1 = f1_score(val_labels, val_preds, average='weighted')
                    epoch_f1_scores.append(val_f1)

                    if val_f1 > best_f1:
                        best_f1 = val_f1
                        best_model = model.state_dict()

                avg_epoch_loss = sum(epoch_losses) / epochs
                avg_epoch_f1 = sum(epoch_f1_scores) / epochs

                all_results.append({
                    'Learning Rate': lr,
                    'Hidden Dim': hidden_dim,
                    'Batch Size': batch_size,
                    'Avg Loss': avg_epoch_loss,
                    'Avg F1 Score': avg_epoch_f1
                })

    results_df = pd.DataFrame(all_results)

    return results_df


### Using the train function and finding the model with best results

In [13]:
results_df = train_model_for_all_params_lstm(None, None, learning_rates, hidden_dims, batch_sizes, epochs=100)

In [14]:
print(results_df)

    Learning Rate  Hidden Dim  Batch Size  Avg Loss  Avg F1 Score
0          0.0010          64          32  0.608667      0.610813
1          0.0010          64          64  0.610352      0.620125
2          0.0010         128          32  0.613213      0.614315
3          0.0010         128          64  0.610342      0.616562
4          0.0010         256          32  0.612140      0.608159
5          0.0010         256          64  0.610235      0.612604
6          0.0005          64          32  0.611709      0.615441
7          0.0005          64          64  0.622290      0.614778
8          0.0005         128          32  0.612374      0.613162
9          0.0005         128          64  0.615942      0.608974
10         0.0005         256          32  0.609614      0.617609
11         0.0005         256          64  0.613627      0.612843
12         0.0100          64          32  0.658906      0.586981
13         0.0100          64          64  0.655069      0.593282
14        