# Project 4: Sentiment Analysis of Text

This project involves building and evaluating deep learning models (RNNs or Transformers) for sentiment classification of text, such as movie reviews or product feedback.

## Preliminaries

In [1]:
!pip install wandb --quiet

In [None]:
# Core Python & Data Handling
import os
import pandas as pd
from collections import defaultdict

# PyTorch Core
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence

# Optimizer
import torch.optim as optim

# For Reproducibility (optional but recommended)
import random
import numpy as np

# For summary of model


# for progress bars
from tqdm import tqdm

import gc
from sklearn.metrics import f1_score

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cpu


# Download datasets

In [5]:
import zipfile
from kaggle.api.kaggle_api_extended import KaggleApi

ModuleNotFoundError: No module named 'kaggle'

In [4]:
# Setup
os.environ['KAGGLE_CONFIG_DIR'] = os.getcwd()
api = KaggleApi()
api.authenticate()

NameError: name 'KaggleApi' is not defined

In [6]:
def show_data(path: str, label: str, text: str, delimiter= ","):
    df = pd.read_csv(path,delimiter=delimiter)

    df = df.rename(columns={text: "text", label: "label"})

    if not df['label'].isin([0, 1, -1]).all():
        df['label'] = df['label'].map({'positive': 1, 'negative': 0})

    num_labels = df['label'].nunique()
    print(f"Number of distinct labels: {num_labels}")
    print(df.head())

    print(f"Number of rows: {df.shape[0]}")

    return df

## IMDb

In [7]:
dataset = 'lakshmi25npathi/imdb-dataset-of-50k-movie-reviews'
api.dataset_download_files(dataset, path= 'IMDB', unzip=True)

NameError: name 'api' is not defined

In [7]:
path = os.path.join("IMDB", "IMDB Dataset.csv")
imdb = show_data(path, "sentiment", "review")

Number of distinct labels: 2
                                                text  label
0  One of the other reviewers has mentioned that ...      1
1  A wonderful little production. <br /><br />The...      1
2  I thought this was a wonderful way to spend ti...      1
3  Basically there's a family where a little boy ...      0
4  Petter Mattei's "Love in the Time of Money" is...      1
Number of rows: 50000


## SST-2

In [28]:
dataset = 'jkhanbk1/sst2-dataset'
api.dataset_download_files(dataset, path='.', unzip=True)

Dataset URL: https://www.kaggle.com/datasets/jkhanbk1/sst2-dataset


In [8]:
path = os.path.join("Finalv SST-2 dataset CSV format", "test.csv")
sst_2 = show_data(path, "label", "sentence")

Number of distinct labels: 2
   label                                               text
0      0        No movement, no yuks, not much of anything.
1      0  A gob of drivel so sickly sweet, even the eage...
2      0  Gangs of New York is an unapologetic mess, who...
3      0  We never really feel involved with the story, ...
4      1              This is one of Polanski's best films.
Number of rows: 1821


## SemEval

In [30]:
dataset = 'azzouza2018/semevaldatadets'
api.dataset_download_files(dataset, path='semEval', unzip=True)

Dataset URL: https://www.kaggle.com/datasets/azzouza2018/semevaldatadets


In [9]:
path = os.path.join("semEval", "semeval-2013-dev.csv")
sem_eval= show_data(path, "label", "text", delimiter='\t')

Number of distinct labels: 3
   label                                               text
0      0  Watching Devil Inside for the 1st time tonight...
1      0  @CMPunk Devil Inside , The exorcisism of Emily...
2      0  Off to do my vlog. Watching Devil Inside and J...
3      1  @raykipo take Silver at the Hib cup. Great day...
4      0  @hollyhippo I'm going to blockbuster tomorrow ...
Number of rows: 1650


In [10]:
frames = [imdb, sst_2, sem_eval]
df = pd.concat(frames)
print(df.shape[0])
df.info()

53471
<class 'pandas.core.frame.DataFrame'>
Index: 53471 entries, 0 to 1649
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    53471 non-null  object
 1   label   53471 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.2+ MB


# Text Preprocessing

Tokenize

In [11]:
import re
import nltk
from collections import Counter

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\garik\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\garik\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\garik\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\garik\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\garik\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\garik\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nl

True

### Defining Stop Words

In [12]:
stop_words = set(nltk.corpus.stopwords.words('english'))

In [13]:
stop_words.discard('no')
stop_words.discard('not')
stop_words.discard('nor')

print(stop_words)

{"haven't", 'had', 'below', 'does', 'as', 'their', 'what', 'm', "should've", 'or', 'will', 'he', 'herself', 'most', 'needn', 'from', 'aren', 'under', 'other', 'how', 'wasn', 'over', 'an', 'themselves', 'didn', 'until', 'll', 'this', 'each', 'having', 'ma', "mightn't", 'those', "shan't", "she'd", 'the', "don't", 're', 'by', 'because', 'has', 'y', 'now', "it'd", 'weren', 'too', 'me', 'was', 'between', 'out', 'whom', "shouldn't", 'more', 'only', 'your', 'off', 'then', 'so', 'few', 'd', 'but', 'where', 'i', 'myself', "she'll", "he's", "that'll", "weren't", "she's", 'who', "i'll", 'mightn', 'very', "we'll", 'just', 'it', "you'll", 'ours', 'did', 'her', "you've", 'while', 'mustn', "doesn't", 'o', "won't", 'wouldn', 'at', 'above', 'on', 'a', 'is', 'have', 'his', 'our', 'isn', "wasn't", 'further', "they've", 'why', "you're", 'him', 'through', 'himself', "couldn't", 'hasn', 'all', 'can', "i've", 'shan', 'itself', 'once', 'same', "they'd", "we'd", "i'm", 'of', 'when', 'haven', 'ourselves', 'they

In [None]:
class PreprocessDataset(Dataset):

    def __init__(self, df, transform=None):
        self.df = df
        self.MAX_VOCAB_SIZE = 10000
        self.MAX_SEQ_LEN = 300
        self.vocab = None

    def get_tokens(self):

        # Lowercase
        self.df['text_processed'] = self.df.apply(lambda row: row['text'].lower(), axis=1)

        # Remove anything that is NOT a word char or whitespace
        self.df['text_processed'] = self.df.apply(lambda row: re.sub(r'[^\w\s]', '', row['text_processed']), axis=1)
        
        # Tokenizer
        self.df['tokens'] = self.df.apply(lambda row: nltk.word_tokenize(row['text_processed']), axis=1)

    # МОЖНО ВЫБРАТЬ ЛИНГВИСТИЧЕСКУЮ МОДЕЛЬ
    def get_terms(self):

        #Remove stop words
        self.df['terms'] = self.df.apply(lambda row: [t for t in row['tokens'] if t not in stop_words], axis=1)

        #Linguistic modules
        stemmer = nltk.stem.PorterStemmer()
        self.df['terms'] = self.df.apply(lambda row: [stemmer.stem(token) for token in row['terms']], axis=1)
        # lemmatizer = nltk.stem.WordNetLemmatizer()

        # self.df['terms'] = self.df.apply(lambda row: [lemmatizer.lemmatize(t, PreprocessDataset.get_wordnet_pos(t)) for t in row['terms']], axis=1)

    def tokens_to_indices(self, tokens):
        return [self.vocab.get(token, self.vocab['<UNK>']) for token in tokens]

    def get_indexing(self):
        
        counter = Counter()
        for tokens in self.df['terms']:
            counter.update(tokens)
            
        self.vocab = {word: i+2 for i, (word, _) in enumerate(counter.most_common())}
        self.vocab['<PAD>'] = 0
        self.vocab['<UNK>'] = 1

        self.df['input_ids'] = self.df['terms'].apply(lambda x: self.tokens_to_indices(x))

    def pad(self, seq):
        if len(seq) < self.MAX_SEQ_LEN:
            return seq + [self.vocab['<PAD>']] * (self.MAX_SEQ_LEN - len(seq))
        else:
            return seq[:self.MAX_SEQ_LEN]

    def add_padding(self):
        
        self.df['input_ids'] = self.df['input_ids'].apply(self.pad)
        

    @staticmethod
    def get_wordnet_pos(word):
        """Map POS tag to first character for lemmatizer"""
        from nltk import pos_tag
        tag = pos_tag([word])[0][1][0].upper()
        return {
            'J': nltk.corpus.wordnet.ADJ,
            'N': nltk.corpus.wordnet.NOUN,
            'V': nltk.corpus.wordnet.VERB,
            'R': nltk.corpus.wordnet.ADV
        }.get(tag, nltk.corpus.wordnet.NOUN)

    def save_vocab(self, filepath):
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(self.vocab, f, ensure_ascii=False, indent=4)

    def load_vocab(self, filepath):
        with open(filepath, 'r', encoding='utf-8') as f:
            self.vocab = json.load(f)

In [16]:
df_dataset = PreprocessDataset(df)

Tokens

In [17]:
df_dataset.get_tokens()
print(df_dataset.df['tokens'].head())

0    [one, of, the, other, reviewers, has, mentione...
1    [a, wonderful, little, production, br, br, the...
2    [i, thought, this, was, a, wonderful, way, to,...
3    [basically, theres, a, family, where, a, littl...
4    [petter, matteis, love, in, the, time, of, mon...
Name: tokens, dtype: object


Terms

In [18]:
df_dataset.get_terms()

In [19]:
print(df_dataset.df['terms'].head())

0    [one, review, mention, watch, 1, oz, episod, y...
1    [wonder, littl, product, br, br, film, techniq...
2    [thought, wonder, way, spend, time, hot, summe...
3    [basic, there, famili, littl, boy, jake, think...
4    [petter, mattei, love, time, money, visual, st...
Name: terms, dtype: object


Create vocabulary and Convert Text to Sequences of Indices

In [20]:
df_dataset.get_indexing()
print(df_dataset.df['input_ids'].head())

0    [6, 241, 364, 14, 429, 2885, 199, 416, 1621, 1...
1    [112, 56, 230, 2, 2, 4, 1632, 13396, 58922, 11...
2    [104, 112, 37, 658, 8, 824, 1335, 2092, 440, 6...
3    [393, 144, 146, 56, 238, 2829, 32, 144, 562, 3...
4    [58924, 8616, 31, 8, 228, 517, 1090, 4, 14, 33...
Name: input_ids, dtype: object


Add pad sequence

In [21]:
df_dataset.add_padding()
print(df_dataset.df['input_ids'].head())

0    [6, 241, 364, 14, 429, 2885, 199, 416, 1621, 1...
1    [112, 56, 230, 2, 2, 4, 1632, 13396, 58922, 11...
2    [104, 112, 37, 658, 8, 824, 1335, 2092, 440, 6...
3    [393, 144, 146, 56, 238, 2829, 32, 144, 562, 3...
4    [58924, 8616, 31, 8, 228, 517, 1090, 4, 14, 33...
Name: input_ids, dtype: object


In [30]:
dataset.save_vocab('vocab.json')

NameError: name 'dataset' is not defined

Save preprocessed Dataset

In [24]:
if os.path.exists('preprocessed_dataset.csv'):
    print("File exists and will be overwritten.")
    
df_dataset.df.to_csv('preprocessed_dataset.csv', index=False)

File exists and will be overwritten.


# Load Dataset

In [25]:
import ast

In [26]:
df = pd.read_csv('preprocessed_dataset.csv')

In [27]:
df['input_ids'] = df['input_ids'].apply(ast.literal_eval)

In [32]:
def remap_label(label):
    if label == -1:
        return 0
    elif label == 0:
        return 1
    elif label == 1:
        return 2

In [33]:
df['label'] = df['label'].apply(remap_label)

# Data Loader

In [34]:
from sklearn.model_selection import train_test_split

In [35]:
class SentimentDataset(Dataset):
    def __init__(self, X, y):
        self.X = X  # Keep as a list of lists
        self.y = y  # Also keep as list or array

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        input_tensor = torch.tensor(self.X[idx], dtype=torch.long)
        label_tensor = torch.tensor(self.y[idx], dtype=torch.long)
        return input_tensor, label_tensor

## Configs

In [36]:
config = {
    "batch_size": 32, # Number of samples per training step, how many samples my model sees at once befor update weights
    "lr": 1e-3,
    "epochs": 10
}

In [37]:
X = df['input_ids'].tolist()
y = df['label'].tolist()

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Train size: {len(X_train)}")
print(f"Validation size: {len(X_val)}")
print(f"Test size: {len(X_test)}")

Train size: 42776
Validation size: 5347
Test size: 5348


In [38]:
train_dataset = SentimentDataset(X_train, y_train)
val_dataset = SentimentDataset(X_val, y_val)
test_dataset = SentimentDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True, num_workers = 0, pin_memory  = False)
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], num_workers = 0)
test_loader = DataLoader(test_dataset, batch_size=config["batch_size"])

In [39]:
print("Number of classes    : ", len(set(y)))
print("No. of train samples : ", len(train_dataset))
print("Input shape example  : ", train_dataset[0][0].shape)
print("Batch size           : ", config['batch_size'])
print("Train batches        : ", len(train_loader))
print("Val batches          : ", len(val_loader))
print("Test batches         : ", len(test_loader))

Number of classes    :  3
No. of train samples :  42776
Input shape example  :  torch.Size([300])
Batch size           :  32
Train batches        :  1337
Val batches          :  168
Test batches         :  168


# LSTM Model

In [40]:
# Bidirectional model because sometimes, the meaning of a word depends on what comes after it (not just before). 
#For example: in "not good", the word "not" completely flips the sentiment of "good"


class SentimentLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim=100, hidden_dim=128, output_dim=2, 
                 n_layers=2, bidirectional=True, dropout=0.5, pad_idx=0):
        super(SentimentLSTM, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)

        self.lstm = nn.LSTM(embedding_dim,
                            hidden_dim,
                            num_layers=n_layers,
                            bidirectional=bidirectional,
                            batch_first=True,
                            dropout=dropout if n_layers > 1 else 0)

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)

    def forward(self, input_ids):
        # input_ids: [B, T]
        embedded = self.dropout(self.embedding(input_ids))       # [B, T, E]
        lstm_out, (hidden, _) = self.lstm(embedded)              # hidden: [n_layers*2, B, H]

        # Concatenate final forward and backward hidden states
        if self.lstm.bidirectional:
            hidden = torch.cat((hidden[-2], hidden[-1]), dim=1)  # [B, H*2]
        else:
            hidden = hidden[-1]                                  # [B, H]
        
        out = self.fc(self.dropout(hidden))                      # [B, output_dim]
        return out

In [41]:
model = SentimentLSTM(
    vocab_size=len(df_dataset.vocab), 
    embedding_dim=100,
    hidden_dim=128,
    output_dim= df['label'].nunique(), 
    n_layers=2,
    bidirectional=True,
    dropout=0.5,
    pad_idx=df_dataset.vocab['<PAD>']
).to(device)

In [None]:
summary(model, input_size=(df_dataset.MAX_SEQ_LEN,))

# Setup everything for training

In [42]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
# Smooth cosine-shaped LR decay, good when you train for a fixed number of epochs.
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config["epochs"])
# You’re doing validation each epoch.
# You want to reduce LR only when validation loss plateaus.
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)

  from .autonotebook import tqdm as notebook_tqdm


In [38]:
print(train_dataset[0])

(tensor([    26,     14,      3,    153,    263,      6,     48,   1196,    130,
          5403,    177,      6,    771,    188,    172,     25,     22,   1145,
          1661,    173,      5,      3,   1021,    287,    163,   4394,    803,
            24,     61,   4208,    119,    965,    245,    183,   1403,   4729,
           361,   7144,     28,   1958,   1273,    113,   4997,   4967,    173,
           444,   9157,     91,    863,    105,     67,    287, 126872,    385,
           414,     88,    371,   1619,   5042,      3,  40010,     15,     30,
            30,      9,     37,     30,    198,     37,   1336,   5297,      4,
           327,      3,     70,    273,    416,    493,    246,   1375,   1281,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,      0,      0,      0,      0,      0,
             0,      0,      0,      0,

# Training

In [43]:
def train(model, dataloader, optimizer, criterion, device, scaler=None, scheduler=None, config=None):
    model.train()
    batch_bar = tqdm(total=len(dataloader), dynamic_ncols=True, leave=False, position=0, desc='Train')

    num_correct = 0
    total_loss = 0

    for i, (inputs, labels) in enumerate(dataloader):
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)

        # Mixed precision context (optional)
        with torch.cuda.amp.autocast(enabled=(scaler is not None)):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        if scaler:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        preds = torch.argmax(outputs, dim=1)
        num_correct += (preds == labels).sum().item()
        total_loss += loss.item()

        batch_bar.set_postfix(
            loss=f"{total_loss/(i+1):.4f}",
            acc=f"{100 * num_correct / ((i+1)*inputs.size(0)):.2f}%",
            lr=f"{optimizer.param_groups[0]['lr']:.6f}"
        )
        batch_bar.update()

        # Scheduler step if NOT ReduceLROnPlateau
        if scheduler is not None and not isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step()

    batch_bar.close()

    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * num_correct / (len(dataloader.dataset))
    return accuracy, avg_loss

# Evaluation

In [44]:
def validate(model, dataloader, criterion, device, config=None):
    model.eval()
    batch_bar = tqdm(total=len(dataloader), dynamic_ncols=True, leave=False, position=0, desc='Val')

    num_correct = 0
    total_loss = 0

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            preds = torch.argmax(outputs, dim=1)
            num_correct += (preds == labels).sum().item()
            total_loss += loss.item()

            batch_bar.set_postfix(
                loss=f"{total_loss/(i+1):.4f}",
                acc=f"{100 * num_correct / ((i+1)*inputs.size(0)):.2f}%"
            )
            batch_bar.update()

    batch_bar.close()

    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * num_correct / (len(dataloader.dataset))
    return accuracy, avg_loss

In [None]:
def train_full_loop(model, train_loader, val_loader, optimizer, criterion, device, config, scheduler=None, scaler=None, patience=5):
    best_val_acc = 0.0
    epochs_no_improve = 0
    train_losses = []
    val_losses = []
    for epoch in range(config['epochs']):
        print(f"\nEpoch {epoch + 1}/{config['epochs']}")
        train_acc, train_loss = train(model, train_loader, optimizer, criterion, device, scaler, scheduler, config)
        val_acc, val_loss = validate(model, val_loader, criterion, device, config)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val   Loss: {val_loss:.4f}, Val   Acc: {val_acc:.2f}%")
        print(f"Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")

        # ReduceLROnPlateau scheduler step
        if scheduler is not None and isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(val_loss)

        # Early stopping logic
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
            torch.save(model.state_dict(), "best_model.pth")
            print("Best model saved as best_model.pth.")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {patience} epochs with no improvement.")
                break

        # Clean up to avoid CUDA OOM
        gc.collect()
        torch.cuda.empty_cache()

    print("\nTraining complete.")
    print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

In [46]:

for inputs, labels in train_loader:
    print(labels)
    break


tensor([1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1,
        1, 2, 1, 1, 2, 2, 1, 2])


In [47]:
scaler = torch.cuda.amp.GradScaler(device)

train_full_loop(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    config=config,
    scheduler=scheduler,
    scaler=scaler,
    patience=3
)




Epoch 1/10


                                                                                                

Train Loss: 0.6643, Train Acc: 63.37%
Val   Loss: 0.5230, Val   Acc: 77.00%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 2/10


                                                                                                 

Train Loss: 0.4956, Train Acc: 77.57%
Val   Loss: 0.3939, Val   Acc: 83.00%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 3/10


                                                                                                 

Train Loss: 0.3925, Train Acc: 83.13%
Val   Loss: 0.3372, Val   Acc: 85.56%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 4/10


                                                                                                 

Train Loss: 0.3384, Train Acc: 85.94%
Val   Loss: 0.3127, Val   Acc: 86.85%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 5/10


                                                                                                 

Train Loss: 0.3003, Train Acc: 87.63%
Val   Loss: 0.3314, Val   Acc: 87.13%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 6/10


                                                                                                 

Train Loss: 0.2719, Train Acc: 89.08%
Val   Loss: 0.2998, Val   Acc: 87.97%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 7/10


                                                                                                 

Train Loss: 0.2459, Train Acc: 90.23%
Val   Loss: 0.2924, Val   Acc: 87.82%
Learning Rate: 0.001000

Epoch 8/10


                                                                                                 

Train Loss: 0.2228, Train Acc: 91.28%
Val   Loss: 0.3157, Val   Acc: 88.03%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 9/10


                                                                                                 

Train Loss: 0.2062, Train Acc: 91.78%
Val   Loss: 0.3201, Val   Acc: 88.16%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 10/10


                                                                                                 

Train Loss: 0.1876, Train Acc: 92.76%
Val   Loss: 0.3179, Val   Acc: 88.31%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Training complete.
Best Validation Accuracy: 88.31%


# Matplot

In [None]:
import matplotlib.pyplot as plt
def plot_metrics(cfg, train_losses, val_losses, val_accuracies, val_f1s, test_loss, test_accuracy, test_f1):
    """
    Plots and saves the training/validation loss, accuracy, and F1 score over epochs.
    Optionally, plots test loss, accuracy, and F1 as horizontal lines.
    """
    epochs = range(1, cfg['epochs'] + 1)
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label="Train Loss")
    plt.plot(epochs, val_losses, label="Validation Loss")
    plt.axhline(y=test_loss, color='r', linestyle='--', label='Test Loss')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Loss over Epochs")
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(epochs, val_accuracies, label="Validation Accuracy")
    plt.plot(epochs, val_f1s, label="Validation F1")
    plt.axhline(y=test_accuracy, color='g', linestyle='--', label='Test Accuracy')
    plt.axhline(y=test_f1, color='m', linestyle='--', label='Test F1')
    plt.xlabel("Epoch")
    plt.ylabel("Score")
    plt.title("Validation Accuracy and F1 over Epochs")
    plt.legend()
    plt.tight_layout()
    plt.savefig("performance_graph.png")
    plt.show()

# Testing

In [None]:
def test(model, dataloader, criterion, device):
    model.eval()
    batch_bar = tqdm(total=len(dataloader), dynamic_ncols=True, leave=False, position=0, desc='Test')

    total_loss = 0
    num_correct = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            num_correct += (preds == labels).sum().item()
            total_loss += loss.item()

            batch_bar.set_postfix(
                loss=f"{total_loss/(i+1):.4f}",
                acc=f"{100 * num_correct / ((i+1)*inputs.size(0)):.2f}%"
            )
            batch_bar.update()

    batch_bar.close()
    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * num_correct / len(dataloader.dataset)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    print(f"\nTest Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%, Test F1: {f1:.4f}")
    return accuracy, avg_loss, f1, all_preds, all_labels

In [None]:
model.load_state_dict(torch.load("best_model.pth"))

# Then call the test function
test_accuracy, test_loss, predictions, ground_truth = test(
    model=model,
    dataloader=test_loader,
    criterion=criterion,
    device=device
    
)

                                                                                 


Test Loss: 0.3264, Test Accuracy: 88.48%




In [None]:
plot_metrics(
    cfg=config,
    train_losses=

)