In [2]:
import numpy as np 
import pandas as pd 
import os

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='bs4')

print(os.listdir("./input"))

['test.tsv', 'test.tsv.zip', 'train.tsv', 'train.tsv.zip']


In [3]:
#ntlk libraries for disctionaries and word_tokenization while preprocessing
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
from bs4 import BeautifulSoup
import re 

#tqdm for progress bar visuals during training
from tqdm import tqdm

import torch
import torch.nn.functional as F #for one-hot encoding of labels
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split

#Libraries for final tokenization of data into numerical tensors for training
from keras.utils import to_categorical
import random
from sklearn.model_selection import train_test_split
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer

#Training utilities
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, f1_score

#For dataset exctraction
import pandas as pd




In [4]:
#dataset for training and testing exctraction from base directory
train= pd.read_csv("./input/train.tsv", sep="\t")
test = pd.read_csv("./input/test.tsv", sep="\t")

In [5]:
test.head() #Check test data

Unnamed: 0,PhraseId,SentenceId,Phrase
0,156061,8545,An intermittently pleasing but mostly routine ...
1,156062,8545,An intermittently pleasing but mostly routine ...
2,156063,8545,An
3,156064,8545,intermittently pleasing but mostly routine effort
4,156065,8545,intermittently pleasing but mostly routine


In [6]:
train.head() #Check train data

Unnamed: 0,PhraseId,SentenceId,Phrase,Sentiment
0,1,1,A series of escapades demonstrating the adage ...,1
1,2,1,A series of escapades demonstrating the adage ...,2
2,3,1,A series,2
3,4,1,A,2
4,5,1,series,2


In [7]:
def clean_sentences(df):
    reviews = []

    for sent in tqdm(df['Phrase']):
        if isinstance(sent, str):
            review_text = BeautifulSoup(sent, 'html.parser').get_text()#Remove html content
            review_text = re.sub("[^a-zA-Z]", " ", review_text)#Remove non-alphabetic characters
            words = word_tokenize(review_text.lower())#Word tokenize the sentences
            lemma_words = [lemmatizer.lemmatize(i) for i in words]#Lemmatize each word to its lemma

            reviews.append(lemma_words)
        else:
            #Handle non-string values
            reviews.append([])

    return reviews #return list of preprocessed data

In [8]:
import nltk
import ssl

# Download the 'punkt' package
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context
    
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [9]:
#clean sentences and check their length
train_sentences = clean_sentences(train)
test_sentences = clean_sentences(test)
print(len(train_sentences))
print(len(test_sentences))

  review_text = BeautifulSoup(sent, 'html.parser').get_text()#Remove html content
100%|████████████████████████████████████████████████████████████████████████| 156060/156060 [00:25<00:00, 6208.55it/s]
100%|██████████████████████████████████████████████████████████████████████████| 66292/66292 [00:09<00:00, 6839.71it/s]

156060
66292





In [10]:
target = torch.tensor(train.Sentiment.values) 
#Calculate the number of classes
num_classes = torch.max(target) + 1
#Convert to one-hot encoding
y_target = F.one_hot(target, num_classes=num_classes)
#Convert to float tensor as it is used for optimizer
y_target = y_target.to(torch.float)


In [11]:
X_train,X_val,y_train,y_val=train_test_split(train_sentences,y_target,test_size=0.20,stratify=y_target, random_state = 40)
#Split dataset into training and validation data
#Validation data is needed for KFold validation and early stopping techniques


In [12]:
#It is needed for initializing tokenizer of keras and subsequent padding
unique_words = set()
len_max = 0

for sent in tqdm(X_train):
    
    unique_words.update(sent)
    
    if(len_max<len(sent)):
        len_max = len(sent)
        
#Length of the list of unique_words gives the no of unique words
print(len(list(unique_words)))
print(len_max)

100%|██████████████████████████████████████████████████████████████████████| 124848/124848 [00:00<00:00, 594166.10it/s]

13730
48





In [13]:
#Tokenize and then convert to equal-sized data
tokenizer = Tokenizer(num_words=len(list(unique_words)))
tokenizer.fit_on_texts(list(X_train))

X_val = tokenizer.texts_to_sequences(X_val)
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(test_sentences)

X_train = sequence.pad_sequences(X_train, maxlen=len_max)
X_val = sequence.pad_sequences(X_val, maxlen=len_max)
X_test = sequence.pad_sequences(X_test, maxlen=len_max)

print(X_train.shape,X_val.shape,X_test.shape)

(124848, 48) (31212, 48) (66292, 48)


In [14]:
#Convert to tensors inputs for pytorch nerual network
X_test = torch.tensor(X_test)
X_train = torch.tensor(X_train)
X_val = torch.tensor(X_val)

In [15]:
#Check shapes
print(X_train.shape,X_val.shape, X_test.shape)

torch.Size([124848, 48]) torch.Size([31212, 48]) torch.Size([66292, 48])


In [16]:
#One of the layers = embedding needed for dataset of words training
#Dropout layer is reqularization technique to drop excessive outputs

#Simple LSTM model with two layers
class SimpleLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim1,output_dim, dropout):
        super(SimpleLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim1, dropout=dropout, batch_first=True, bidirectional=False)
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(hidden_dim1, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out1, _ = self.lstm1(embedded)
        fc1_out = self.fc1(lstm_out1[:, -1, :])
        fc1_out = self.dropout(fc1_out)
        output = self.softmax(fc1_out)
        return output
    
#Complex LSTM model with 4 layers
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim1, hidden_dim2, output_dim, dropout):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim1, dropout=dropout, batch_first=True, bidirectional=False)
        self.lstm2 = nn.LSTM(hidden_dim1, hidden_dim2, dropout=dropout, batch_first=True, bidirectional=False)
        self.fc1 = nn.Linear(hidden_dim2, 100)
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(100, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out1, _ = self.lstm1(embedded)
        lstm_out2, _ = self.lstm2(lstm_out1)
        lstm_out2 = lstm_out2[:, -1, :]
        fc1_out = self.fc1(lstm_out2)
        fc1_out = self.dropout(fc1_out)
        output = self.fc2(fc1_out)
        output = self.softmax(output)
        return output
    
#Complex LSTM model with 3 layers, 1 lstm and 2 linear layers
class ModerateLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim1, output_dim, dropout):
        super(ModerateLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim1, dropout=dropout, batch_first=True, bidirectional=False)
        self.fc1 = nn.Linear(hidden_dim1, 100)
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(100, output_dim)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out1, _ = self.lstm1(embedded)
        lstm_out1 = lstm_out1[:, -1, :]
        fc1_out = self.fc1(lstm_out1)
        fc1_out = self.dropout(fc1_out)
        output = self.fc2(fc1_out)
        output = self.softmax(output)
        return output

# Convert data to PyTorch DataLoader
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [17]:
#From input data create loaders
def create_data_loader(X, y):
    train_dataset = TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) #Batch size 32
    return train_loader

***KFOLD VALIDATION FOR DIFFERENT LEARNING RATES***

FOR LSTM

In [18]:
def kfold_lstm_lr(a_model):
    #use different learning rates for comparison
    learning_rates = [0.0005, 0.0003, 0.0001]
    device = "cpu"
    num_epochs = 8
    num_folds = 5
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    all_train_losses = []  
    all_val_losses = []    
    all_train_f1_scores = []  
    all_val_f1_scores = []    
    early_stopping_patience = 2  

    for lr in learning_rates:
        print(f"Testing Learning Rate: {lr}")

        val_accuracies = []
        for fold, (train_indices, val_indices) in enumerate(kf.split(X_train)):

            model = a_model
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=lr)
            
            print(f"Fold {fold + 1}/{num_folds}")

            #Split data by specific folds
            X_train_fold, y_train_fold = X_train[train_indices], y_train[train_indices]
            X_val_fold, y_val_fold = X_train[val_indices], y_train[val_indices]

            early_stopping_counter = 0
            
            train_loader = create_data_loader(X_train_fold, y_train_fold)
            val_loader = create_data_loader(X_val_fold, y_val_fold)

            model.to(device)

            train_losses = []  
            val_losses = []    

            val_f1 = []
            train_f1 = []

            best_val_accuracy = 0.0

            for epoch in range(num_epochs):
                model.train()

                progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}')
                train_f1_scores = []  
                val_f1_scores = []  
                
                #Train batches
                for batch_idx, (inputs, labels) in progress_bar:
                    inputs, labels = inputs.to(device), labels.argmax(dim=1).to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)

                    loss = criterion(outputs, labels)

                    _, predicted = torch.max(outputs, 1)
                    f1_batch = f1_score(labels.cpu(), predicted.cpu(), average='weighted')
                    train_f1_scores.append(f1_batch)

                    loss.backward()
                    optimizer.step()

                    progress_bar.set_postfix(loss=loss.item(), f1=f1_batch)

                train_f1.append(sum(train_f1_scores) / len(train_f1_scores))

                model.eval()
                with torch.no_grad():
                    val_loss = 0.0
                    correct = 0
                    total = 0

                    val_progress_bar = tqdm(enumerate(val_loader), total=len(val_loader), desc=f'Fold {fold + 1}, Validation Epoch {epoch + 1}/{num_epochs}')

                    #Validate batches
                    for val_batch_idx, (val_inputs, val_labels) in val_progress_bar:
                        val_inputs, val_labels = val_inputs.to(device), val_labels.argmax(dim=1).to(device)
                        val_outputs = model(val_inputs)
                        val_loss += criterion(val_outputs, val_labels).item()

                        _, predicted_val = torch.max(val_outputs, 1)
                        f1_batch_val = f1_score(val_labels.cpu(), predicted_val.cpu(), average='weighted')
                        val_f1_scores.append(f1_batch_val)

                        _, predicted = torch.max(val_outputs, 1)
                        total += val_labels.size(0)
                        correct += (predicted == val_labels).sum().item()

                    accuracy = correct / total
                    val_accuracies.append(accuracy)
                    avg_val_f1 = sum(val_f1_scores) / len(val_f1_scores)
                    
                    print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, '
                        f'Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {accuracy:.4f}, '
                        f'Validation F1 Score: {avg_val_f1:.4f}')

                    #Store metrics
                    train_losses.append(loss.item())
                    val_losses.append(val_loss / len(val_loader))
                    val_f1.append(avg_val_f1)

                    # Early stopping
                    if accuracy > best_val_accuracy:
                        best_val_accuracy = accuracy
                        early_stopping_counter = 0
                    else:
                        early_stopping_counter += 1


                if early_stopping_counter >= early_stopping_patience:
                    print("Early stopping as validation accuracy is not increasing.")
                    break

            all_train_losses.append(train_losses)
            all_val_losses.append(val_losses)
            all_train_f1_scores.append(train_f1)
            all_val_f1_scores.append(val_f1)

            print(f"Learning Rate: {lr}, Best Validation Accuracy: {max(val_accuracies):.4f}")

            epochs_range = range(1, num_epochs + 1)

        # Plotting the losses for all folds
        plt.figure(figsize=(10, 5))
        for fold in range(num_folds):
            plt.plot(epochs_range, all_train_losses[fold], label=f'Fold {fold + 1} Training Loss', linestyle='--')
            plt.plot(epochs_range, all_val_losses[fold], label=f'Fold {fold + 1} Validation Loss')

        plt.title('Training and Validation Loss Over Epochs for all Folds')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

       # Plotting the F1 scores for all folds
        plt.figure(figsize=(10, 5))
        for fold in range(num_folds):
            plt.plot(epochs_range, all_train_f1_scores[fold], label=f'Fold {fold + 1} Training F1 Score', linestyle='--')
            plt.plot(epochs_range, all_val_f1_scores[fold], label=f'Fold {fold + 1} Average Validation F1 Score')  # Change label

        plt.title('Training and Validation F1 Score Over Epochs for all Folds')
        plt.xlabel('Epoch')
        plt.ylabel('F1 Score')
        plt.legend()
        plt.show()


In [19]:
# kfold_lstm_lr(LSTMModel(vocab_size=len(unique_words), embedding_dim=300, hidden_dim1=128, hidden_dim2=64, output_dim=num_classes, dropout=0.5))
# kfold_lstm_lr(SimpleLSTMModel(vocab_size=len(unique_words), embedding_dim=300, hidden_dim1=128, output_dim=num_classes, dropout=0.5))
# kfold_lstm_lr(ModerateLSTMModel(vocab_size=len(unique_words), embedding_dim=300, hidden_dim1=128, output_dim=num_classes, dropout=0.5))

In [20]:
# Function specifically for testing different number of epochs (by default [8, 12, 16, 20])
def kfold_lstm_epochs(a_model, lr=0.0005, epochs=[8, 12, 16, 20]):
    #use different learning rates for comparison
    device = "cpu"
    num_folds = 5
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

    all_train_losses = []  
    all_val_losses = []    
    all_train_f1_scores = []  
    all_val_f1_scores = []    
    early_stopping_patience = 2  

    for num_epochs in epochs:
        print(f"Testing Epochs: {num_epochs} With Learning Rate: {lr}")

        val_accuracies = []
        for fold, (train_indices, val_indices) in enumerate(kf.split(X_train)):

            model = a_model
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=lr)
            
            print(f"Fold {fold + 1}/{num_folds}")

            #Split data by specific folds
            X_train_fold, y_train_fold = X_train[train_indices], y_train[train_indices]
            X_val_fold, y_val_fold = X_train[val_indices], y_train[val_indices]

            early_stopping_counter = 0
            
            train_loader = create_data_loader(X_train_fold, y_train_fold)
            val_loader = create_data_loader(X_val_fold, y_val_fold)

            model.to(device)

            train_losses = []  
            val_losses = []    

            val_f1 = []
            train_f1 = []

            best_val_accuracy = 0.0

            for epoch in range(num_epochs):
                model.train()

                progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}')
                train_f1_scores = []  
                val_f1_scores = []  
                
                #Train batches
                for batch_idx, (inputs, labels) in progress_bar:
                    inputs, labels = inputs.to(device), labels.argmax(dim=1).to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)

                    loss = criterion(outputs, labels)

                    _, predicted = torch.max(outputs, 1)
                    f1_batch = f1_score(labels.cpu(), predicted.cpu(), average='weighted')
                    train_f1_scores.append(f1_batch)

                    loss.backward()
                    optimizer.step()

                    progress_bar.set_postfix(loss=loss.item(), f1=f1_batch)

                train_f1.append(sum(train_f1_scores) / len(train_f1_scores))

                model.eval()
                with torch.no_grad():
                    val_loss = 0.0
                    correct = 0
                    total = 0

                    val_progress_bar = tqdm(enumerate(val_loader), total=len(val_loader), desc=f'Fold {fold + 1}, Validation Epoch {epoch + 1}/{num_epochs}')

                    #Validate batches
                    for val_batch_idx, (val_inputs, val_labels) in val_progress_bar:
                        val_inputs, val_labels = val_inputs.to(device), val_labels.argmax(dim=1).to(device)
                        val_outputs = model(val_inputs)
                        val_loss += criterion(val_outputs, val_labels).item()

                        _, predicted_val = torch.max(val_outputs, 1)
                        f1_batch_val = f1_score(val_labels.cpu(), predicted_val.cpu(), average='weighted')
                        val_f1_scores.append(f1_batch_val)

                        _, predicted = torch.max(val_outputs, 1)
                        total += val_labels.size(0)
                        correct += (predicted == val_labels).sum().item()

                    accuracy = correct / total
                    val_accuracies.append(accuracy)
                    avg_val_f1 = sum(val_f1_scores) / len(val_f1_scores)
                    
                    print(f'Fold {fold + 1}, Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}, '
                        f'Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {accuracy:.4f}, '
                        f'Validation F1 Score: {avg_val_f1:.4f}')

                    #Store metrics
                    train_losses.append(loss.item())
                    val_losses.append(val_loss / len(val_loader))
                    val_f1.append(avg_val_f1)

                    # Early stopping
                    if accuracy > best_val_accuracy:
                        best_val_accuracy = accuracy
                        early_stopping_counter = 0
                    else:
                        early_stopping_counter += 1


                if early_stopping_counter >= early_stopping_patience:
                    print("Early stopping as validation accuracy is not increasing.")
                    break

            all_train_losses.append(train_losses)
            all_val_losses.append(val_losses)
            all_train_f1_scores.append(train_f1)
            all_val_f1_scores.append(val_f1)

            print(f"Epochs: {num_epochs}, Learning Rate: {lr}, Best Validation Accuracy: {max(val_accuracies):.4f}")

            epochs_range = range(1, num_epochs + 1)

        # Plotting the losses for all folds
        plt.figure(figsize=(10, 5))
        for fold in range(num_folds):
            plt.plot(epochs_range, all_train_losses[fold], label=f'Fold {fold + 1} Training Loss', linestyle='--')
            plt.plot(epochs_range, all_val_losses[fold], label=f'Fold {fold + 1} Validation Loss')

        plt.title('Training and Validation Loss Over Epochs for all Folds')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()

       # Plotting the F1 scores for all folds
        plt.figure(figsize=(10, 5))
        for fold in range(num_folds):
            plt.plot(epochs_range, all_train_f1_scores[fold], label=f'Fold {fold + 1} Training F1 Score', linestyle='--')
            plt.plot(epochs_range, all_val_f1_scores[fold], label=f'Fold {fold + 1} Average Validation F1 Score')  # Change label

        plt.title('Training and Validation F1 Score Over Epochs for all Folds')
        plt.xlabel('Epoch')
        plt.ylabel('F1 Score')
        plt.legend()
        plt.show()


In [21]:
kfold_lstm_epochs(LSTMModel(vocab_size=len(unique_words), embedding_dim=300, hidden_dim1=128, hidden_dim2=64, output_dim=num_classes, dropout=0.5), lr=0.0005, epochs=[8, 12, 16, 20])
kfold_lstm_lr(SimpleLSTMModel(vocab_size=len(unique_words), embedding_dim=300, hidden_dim1=128, output_dim=num_classes, dropout=0.5), lr=0.0005, epochs=[32, 32, 32])
kfold_lstm_lr(ModerateLSTMModel(vocab_size=len(unique_words), embedding_dim=300, hidden_dim1=128, output_dim=num_classes, dropout=0.5), lr=0.0005, epochs=[64, 64, 64])



Testing Epochs: 8 With Learning Rate: 0.0005
Fold 1/5


Fold 1, Epoch 1/8: 100%|██████████████████████████████████████| 3902/3902 [04:17<00:00, 15.16it/s, f1=0.326, loss=1.48]
Fold 1, Validation Epoch 1/8: 100%|████████████████████████████████████████████████| 3902/3902 [00:40<00:00, 95.89it/s]


Fold 1, Epoch 1/8, Loss: 1.4836, Validation Loss: 1.2912, Validation Accuracy: 0.6097, Validation F1 Score: 0.5578


Fold 1, Epoch 2/8:  75%|█████████████████████████████▎         | 2931/3902 [22:45<07:32,  2.15it/s, f1=0.54, loss=1.31]


KeyboardInterrupt: 

In [25]:
def train_model(model, train_loader, val_loader, X_test_tensor):
    # Model, Loss, and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0005)

    # Training
    num_epochs = 64
    device = torch.device("cpu")

    model.to(device)

    val_accuracies = []
    train_losses = []  # Track training losses
    val_losses = []    # Track validation losses

    patience = 5

    best_val_accuracy = 0.0
    counter = 0  # Counter for early stopping

    tmp_epoch = 0

    for epoch in range(num_epochs):
        model.train()

        progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f'Epoch {epoch+1}/{num_epochs}')

        #Train batches
        for batch_idx, (inputs, labels) in progress_bar:
            inputs, labels = inputs.to(device), labels.argmax(dim=1).to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            progress_bar.set_postfix(loss=loss.item())

        model.eval()
        with torch.no_grad():
            val_loss = 0.0
            correct = 0
            total = 0

            val_progress_bar = tqdm(enumerate(val_loader), total=len(val_loader), desc=f'Validation Epoch {epoch+1}/{num_epochs}')

            y_true = []
            y_pred = []
            #Validate batches
            for val_batch_idx, (val_inputs, val_labels) in val_progress_bar:
                val_inputs, val_labels = val_inputs.to(device), val_labels.argmax(dim=1).to(device)

                val_outputs = model(val_inputs)
                _, predicted = torch.max(val_outputs, 1)

                batch_loss = criterion(val_outputs, val_labels)
                val_loss += batch_loss.item()

                y_true.extend(val_labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

            accuracy = accuracy_score(y_true, y_pred)
            f1 = f1_score(y_true, y_pred, average='weighted')

            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}')


            train_losses.append(loss.item())
            val_losses.append(val_loss / len(val_loader))
            val_accuracies.append(accuracy)

            # Check for early stopping
            if accuracy > best_val_accuracy:
                best_val_accuracy = accuracy
                counter = 0
            else:
                counter += 1

            tmp_epoch = epoch + 1
            
            if counter >= patience:
                print(f"Early stopping as validation accuracy has not improved for {patience} consecutive epochs.")
                break


    epochs_range = range(1, tmp_epoch + 1)
    
    #Plot losses
    plt.figure(figsize=(10, 5))
    plt.plot(epochs_range, train_losses, label='Training Loss')
    plt.plot(epochs_range, val_losses, label='Validation Loss')
    plt.title('Training and Validation Metrics Over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Metrics')
    plt.legend()
    plt.show()

    #Plot accuray
    plt.figure(figsize=(10, 5))
    plt.plot(epochs_range, val_accuracies, label='Validation Accuracy')
    plt.title('Accuracy over epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

    

    model.eval()

    #Generate predictions
    with torch.no_grad():
        test_outputs = model(X_test_tensor)

    #Convert the model outputs to predicted labels
    _, predicted_labels = torch.max(test_outputs, 1)

    #Create a DataFrame for submission
    submission_df = pd.DataFrame({
        'PhraseId': test['PhraseId'],
        'Sentiment': predicted_labels.cpu().numpy()
    })

    #Save the submission DataFrame to a CSV file
    submission_df.to_csv('submission3.2.csv', index=False)

In [26]:
#Final model
model = ModerateLSTMModel(vocab_size=len(unique_words), embedding_dim=300, hidden_dim1=128, output_dim=num_classes, dropout=0.5)
train_model(model, train_loader, val_loader, X_test)

Epoch 1/64:   5%|▍         | 184/3902 [00:04<01:38, 37.80it/s, loss=1.31]


KeyboardInterrupt: 