# Project 4: Sentiment Analysis of Text

This project involves building and evaluating deep learning models (RNNs or Transformers) for sentiment classification of text, such as movie reviews or product feedback.

## Preliminaries

In [69]:
import os
import pandas as pd

# PyTorch Core
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence

# Optimizer
import torch.optim as optim

# for progress bars
from tqdm import tqdm

import gc
import numpy as np

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cpu


## Download datasets

In [3]:
import zipfile
from kaggle.api.kaggle_api_extended import KaggleApi

In [4]:
def download_dataset(dataset: str, path: str):
    os.environ['KAGGLE_CONFIG_DIR'] = os.getcwd()
    api = KaggleApi()
    api.authenticate()
    api.dataset_download_files(dataset, path, unzip=True)

In [25]:
def remap_label(label, name):
    if label == -1 or label == 'negative':
        return 0
    elif label == 1 or label == 'positive':
        return 2
    elif label == 0 and name != "SST-2":
        return 1
    # negative for SST-2
    else:
        return 0

In [26]:
def show_data(path: str, label: str, text: str, delimiter= ",", name=""):
    df = pd.read_csv(path,delimiter=delimiter)

    df = df.rename(columns={text: "text", label: "label"})

    df['label'] = df['label'].apply(lambda x: remap_label(x, name))

    num_labels = df['label'].nunique()
    print(f"Number of distinct labels: {num_labels}")
    print(df.head())

    print(f"Number of rows: {df.shape[0]}")

    return df

### IMDb

In [27]:
download_dataset('lakshmi25npathi/imdb-dataset-of-50k-movie-reviews', 'IMDB')

Dataset URL: https://www.kaggle.com/datasets/lakshmi25npathi/imdb-dataset-of-50k-movie-reviews


In [28]:
path = os.path.join("IMDB", "IMDB Dataset.csv")
imdb = show_data(path, "sentiment", "review", name="IMDB")

Number of distinct labels: 2
                                                text  label
0  One of the other reviewers has mentioned that ...      2
1  A wonderful little production. <br /><br />The...      2
2  I thought this was a wonderful way to spend ti...      2
3  Basically there's a family where a little boy ...      0
4  Petter Mattei's "Love in the Time of Money" is...      2
Number of rows: 50000


### SST-2

In [29]:
download_dataset('jkhanbk1/sst2-dataset', '.')

Dataset URL: https://www.kaggle.com/datasets/jkhanbk1/sst2-dataset


In [34]:
base_path = "Finalv SST-2 dataset CSV format"
splits = ["train.csv", "val.csv", "test.csv"]

frames = []
for split in splits:
    path = os.path.join(base_path, split)
    print(f"split: {splits}")
    df = show_data(path, label="label", text="sentence",name="SST-2")
    frames.append(df)

sst_2 = pd.concat(frames, ignore_index=True)

split: ['train.csv', 'val.csv', 'test.csv']
Number of distinct labels: 2
   label                                               text
0      2  A stirring, funny and finally transporting re-...
1      0  Apparently reassembled from the cutting-room f...
2      0  They presume their audience won't sit still fo...
3      2  This is a visually stunning rumination on love...
4      2  Jonathan Parker's Bartleby should have been th...
Number of rows: 6920
split: ['train.csv', 'val.csv', 'test.csv']
Number of distinct labels: 2
   label                                               text
0      0                        One long string of cliches.
1      0  If you've ever entertained the notion of doing...
2      0  K-19 exploits our substantial collective fear ...
3      0  It's played in the most straight-faced fashion...
4      2  There is a fabric of complex ideas here, and f...
Number of rows: 872
split: ['train.csv', 'val.csv', 'test.csv']
Number of distinct labels: 2
   label            

### SemEval

In [35]:
download_dataset('azzouza2018/semevaldatadets', 'semEval')

Dataset URL: https://www.kaggle.com/datasets/azzouza2018/semevaldatadets


In [36]:
base_path = "semEval"
splits = ["semeval-2013-dev.csv", "semeval-2013-test.csv", "semeval-2013-train.csv"]

frames = []
for split in splits:
    path = os.path.join(base_path, split)
    print(f"split: {splits}")
    df = show_data(path, "label", "text", delimiter='\t', name = "semEval")
    frames.append(df)

sem_eval = pd.concat(frames, ignore_index=True)

split: ['semeval-2013-dev.csv', 'semeval-2013-test.csv', 'semeval-2013-train.csv']
Number of distinct labels: 3
   label                                               text
0      1  Watching Devil Inside for the 1st time tonight...
1      1  @CMPunk Devil Inside , The exorcisism of Emily...
2      1  Off to do my vlog. Watching Devil Inside and J...
3      2  @raykipo take Silver at the Hib cup. Great day...
4      1  @hollyhippo I'm going to blockbuster tomorrow ...
Number of rows: 1650
split: ['semeval-2013-dev.csv', 'semeval-2013-test.csv', 'semeval-2013-train.csv']
Number of distinct labels: 3
   label                                               text
0      2  I just hope Ian Bennett is first out of the se...
1      2  @JonHeymanCBS What is Selig's next brilliant i...
2      1  When I looked at the full moon last night I im...
3      2  Watched a movie yesterday #The70's on #OVTV an...
4      2  I liked a @YouTube video http://t.co/dSPcjWDB ...
Number of rows: 3545
split: ['semev

## Concatenate Datasets and Inspect Combined DataFrame

In [37]:
frames = [imdb, sst_2, sem_eval]
df = pd.concat(frames)
print(df.shape[0])
df.info()

74424
<class 'pandas.core.frame.DataFrame'>
Int64Index: 74424 entries, 0 to 14810
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    74424 non-null  object
 1   label   74424 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.7+ MB


## Text Preprocessing

Tokenize

In [38]:
import re
import nltk
from collections import Counter

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Aksinia\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Aksinia\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Aksinia\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Aksinia\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Aksinia\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Aksinia\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-

True

### Defining Stop Words

In [52]:
import json

In [39]:
stop_words = set(nltk.corpus.stopwords.words('english'))

In [40]:
stop_words.discard('no')
stop_words.discard('not')
stop_words.discard('nor')

print(stop_words)

{'hers', "she's", 'd', 'is', 'only', "we'll", 'again', "should've", 'shouldn', 'ours', 'into', 'the', 'shan', 'me', 'does', "we're", 'wouldn', 'it', "mightn't", 'themselves', 'now', "i'll", 've', 'after', 'who', 'once', "you're", "doesn't", 'by', 'needn', "they're", 'couldn', 'few', "he's", "aren't", 'yours', 'mightn', "he'd", 'how', 'through', 'them', 'between', 'himself', 'haven', 'herself', 'hadn', "it'd", 'ourselves', 'some', 'll', 'each', "it'll", 'there', "hasn't", 'her', 'same', 'if', 'than', "he'll", "we'd", "they'll", 'off', 'i', "shan't", 'doesn', 'ain', 'be', 't', 'y', 'over', "they've", 'being', 'our', "shouldn't", 'wasn', 'what', 'up', 'against', "they'd", "you'd", 'can', 'she', "hadn't", 'to', 'mustn', 'has', 'should', 'doing', 'which', 'am', 'from', "i've", 'don', 'your', 'didn', 'weren', 'under', 'in', 'its', 'both', 'm', "i'd", 'have', 'own', 'an', 'at', 'but', 'just', "won't", 'yourselves', 'other', 'until', 'o', "haven't", 'because', 'more', 'during', 'further', 's',

In [73]:
class PreprocessDataset(Dataset):

    def __init__(self, df, transform=None):
        self.df = df
        self.MAX_VOCAB_SIZE = 10000
        self.MAX_SEQ_LEN = 300
        self.vocab = None

    def get_tokens(self):

        # Lowercase
        self.df['text_processed'] = self.df.apply(lambda row: row['text'].lower(), axis=1)

        # Remove anything that is NOT a word char or whitespace
        self.df['text_processed'] = self.df.apply(lambda row: re.sub(r'[^\w\s]', '', row['text_processed']), axis=1)
        
        # Tokenizer
        self.df['tokens'] = self.df.apply(lambda row: nltk.word_tokenize(row['text_processed']), axis=1)

    def get_terms(self):

        #Remove stop words
        self.df['terms'] = self.df.apply(lambda row: [t for t in row['tokens'] if t not in stop_words], axis=1)

        #Linguistic modules
        stemmer = nltk.stem.PorterStemmer()
        self.df['terms'] = self.df.apply(lambda row: [stemmer.stem(token) for token in row['terms']], axis=1)
        # lemmatizer = nltk.stem.WordNetLemmatizer()

        # self.df['terms'] = self.df.apply(lambda row: [lemmatizer.lemmatize(t, PreprocessDataset.get_wordnet_pos(t)) for t in row['terms']], axis=1)

    def tokens_to_indices(self, tokens):
        return [self.vocab.get(token, self.vocab['<UNK>']) for token in tokens]

    def get_indexing(self):
        
        counter = Counter()
        for tokens in self.df['terms']:
            counter.update(tokens)
            
        self.vocab = {word: i+2 for i, (word, _) in enumerate(counter.most_common())}
        self.vocab['<PAD>'] = 0
        self.vocab['<UNK>'] = 1

        self.df['input_ids'] = self.df['terms'].apply(lambda x: self.tokens_to_indices(x))

    def pad(self, seq):
        if len(seq) < self.MAX_SEQ_LEN:
            return seq + [self.vocab['<PAD>']] * (self.MAX_SEQ_LEN - len(seq))
        else:
            return seq[:self.MAX_SEQ_LEN]

    def add_padding(self):
        
        self.df['input_ids'] = self.df['input_ids'].apply(self.pad)
        

    @staticmethod
    def get_wordnet_pos(word):
        """Map POS tag to first character for lemmatizer"""
        from nltk import pos_tag
        tag = pos_tag([word])[0][1][0].upper()
        return {
            'J': nltk.corpus.wordnet.ADJ,
            'N': nltk.corpus.wordnet.NOUN,
            'V': nltk.corpus.wordnet.VERB,
            'R': nltk.corpus.wordnet.ADV
        }.get(tag, nltk.corpus.wordnet.NOUN)

    def save_vocab(self, filepath):
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(self.vocab, f, ensure_ascii=False, indent=4)

    def load_vocab(self, filepath):
        with open(filepath, 'r', encoding='utf-8') as f:
            self.vocab = json.load(f)

In [74]:
preprocessor = PreprocessDataset(df)

### Tokens

Each token is a candidate for an index entry

In [76]:
preprocessor.get_tokens()
print(preprocessor.df['tokens'].head())

0    [one, of, the, other, reviewers, has, mentione...
1    [a, wonderful, little, production, br, br, the...
2    [i, thought, this, was, a, wonderful, way, to,...
3    [basically, theres, a, family, where, a, littl...
4    [petter, matteis, love, in, the, time, of, mon...
Name: tokens, dtype: object


### Terms

Normalized word types (excluding stop words and after applying the linguistic module)

In [77]:
preprocessor.get_terms()

In [78]:
print(preprocessor.df['terms'].head())

0    [one, review, mention, watch, 1, oz, episod, y...
1    [wonder, littl, product, br, br, film, techniq...
2    [thought, wonder, way, spend, time, hot, summe...
3    [basic, there, famili, littl, boy, jake, think...
4    [petter, mattei, love, time, money, visual, st...
Name: terms, dtype: object


### Indexing

Create vocabulary and assign unique integer IDs to each normalized term

In [79]:
preprocessor.get_indexing()
print(preprocessor.df['input_ids'].head())

0    [6, 249, 370, 13, 404, 2981, 199, 421, 1637, 1...
1    [114, 55, 233, 2, 2, 4, 1666, 12728, 61868, 10...
2    [108, 114, 37, 654, 8, 798, 1296, 1705, 442, 6...
3    [403, 143, 145, 55, 238, 2869, 32, 143, 575, 3...
4    [37858, 8398, 28, 8, 232, 510, 1124, 4, 13, 33...
Name: input_ids, dtype: object


### Padding

In [80]:
preprocessor.add_padding()
print(preprocessor.df['input_ids'].head())

0    [6, 249, 370, 13, 404, 2981, 199, 421, 1637, 1...
1    [114, 55, 233, 2, 2, 4, 1666, 12728, 61868, 10...
2    [108, 114, 37, 654, 8, 798, 1296, 1705, 442, 6...
3    [403, 143, 145, 55, 238, 2869, 32, 143, 575, 3...
4    [37858, 8398, 28, 8, 232, 510, 1124, 4, 13, 33...
Name: input_ids, dtype: object


In [81]:
preprocessor.save_vocab('vocab.json')

### Save preprocessed Dataset

In [82]:
if os.path.exists('preprocessed_dataset.csv'):
    print("File exists and will be overwritten.")
    
preprocessor.df.to_csv('preprocessed_dataset.csv', index=False)

File exists and will be overwritten.


### Load Dataset

In [83]:
import ast

In [84]:
df = pd.read_csv('preprocessed_dataset.csv')

In [85]:
df['input_ids'] = df['input_ids'].apply(ast.literal_eval)

## Load embedding

In [91]:
def build_embedding_matrix(embedding_path, vocab, embedding_dim=300):
        embedding_matrix = np.random.uniform(-0.25, 0.25, (len(vocab), embedding_dim)).astype(np.float32)
        found = 0
    
        with open(embedding_path, 'r', encoding='utf-8') as f:
            for line in f:
                parts = line.strip().split()
                word = parts[0]
                vector = np.array(parts[1:], dtype=np.float32)
    
                if word in vocab:
                    idx = vocab[word]
                    embedding_matrix[idx] = vector
                    found += 1
    
        print(f"Found {found}/{len(vocab)} words in the embedding file.")
        return embedding_matrix

In [97]:
embedding_matrix = build_embedding_matrix("glove.6B.300d.txt", preprocessor.vocab, embedding_dim=300)

Found 41311/161743 words in the embedding file.


## Data Loader

In [58]:
from sklearn.model_selection import train_test_split

In [98]:
class SentimentDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        input_tensor = torch.tensor(self.X[idx], dtype=torch.long)
        label_tensor = torch.tensor(self.y[idx], dtype=torch.long)
        return input_tensor, label_tensor

## Configs

In [99]:
config = {
    "batch_size": 32,
    "lr": 1e-3,
    "epochs": 10
}

In [100]:
X = df['input_ids'].tolist()
y = df['label'].tolist()

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Train size: {len(X_train)}")
print(f"Validation size: {len(X_val)}")
print(f"Test size: {len(X_test)}")

Train size: 59539
Validation size: 7442
Test size: 7443


In [101]:
train_dataset = SentimentDataset(X_train, y_train)
val_dataset = SentimentDataset(X_val, y_val)
test_dataset = SentimentDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True, num_workers = 0, pin_memory  = False)
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], num_workers = 0)
test_loader = DataLoader(test_dataset, batch_size=config["batch_size"])

In [102]:
print("Number of classes    : ", len(set(y)))
print("No. of train samples : ", len(train_dataset))
print("Input shape example  : ", train_dataset[0][0].shape)
print("Batch size           : ", config['batch_size'])
print("Train batches        : ", len(train_loader))
print("Val batches          : ", len(val_loader))
print("Test batches         : ", len(test_loader))

Number of classes    :  3
No. of train samples :  59539
Input shape example  :  torch.Size([300])
Batch size           :  32
Train batches        :  1861
Val batches          :  233
Test batches         :  233


# LSTM Model

# TODO: experiemnt with embedding_matrix and  vocab_size learned from scratch

In [103]:
# Bidirectional model because sometimes, the meaning of a word depends on what comes after it (not just before). 
#For example: in "not good", the word "not" completely flips the sentiment of "good"


class SentimentLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim=100, hidden_dim=128, output_dim=2, 
                 n_layers=2, bidirectional=True, dropout=0.5, pad_idx=0):
        super(SentimentLSTM, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)

        self.lstm = nn.LSTM(embedding_dim,
                            hidden_dim,
                            num_layers=n_layers,
                            bidirectional=bidirectional,
                            batch_first=True,
                            dropout=dropout if n_layers > 1 else 0)

        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)

    def forward(self, input_ids):
        # input_ids: [B, T]
        embedded = self.dropout(self.embedding(input_ids))       # [B, T, E]
        lstm_out, (hidden, _) = self.lstm(embedded)              # hidden: [n_layers*2, B, H]

        # Concatenate final forward and backward hidden states
        if self.lstm.bidirectional:
            hidden = torch.cat((hidden[-2], hidden[-1]), dim=1)  # [B, H*2]
        else:
            hidden = hidden[-1]                                  # [B, H]
        
        out = self.fc(self.dropout(hidden))                      # [B, output_dim]
        return out

In [104]:
model = SentimentLSTM(
    vocab_size=len(preprocessor.vocab), 
    embedding_dim=100,
    hidden_dim=128,
    output_dim= df['label'].nunique(), 
    n_layers=2,
    bidirectional=True,
    dropout=0.5,
    pad_idx=preprocessor.vocab['<PAD>']
).to(device)

## Setup everything for training

In [105]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
# Smooth cosine-shaped LR decay, good when you train for a fixed number of epochs.
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=config["epochs"])
# You’re doing validation each epoch.
# You want to reduce LR only when validation loss plateaus.
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)

In [106]:
print(train_dataset[0])

(tensor([  80, 6541,  146, 1124,  708,   99,   20,   27,  251, 1419,  552,  618,
          75,   65, 1799, 1295,  300,   27,  310,   85,   27,  209,  399,    3,
          48,   55, 1621, 2125, 1614, 1888,  483, 1322,   87,  628,  855,   27,
          65,  786,  133,   26,  214,  131, 2803,  103,   71,   37,   29,   69,
         105,  101,  827,   27,  388,  123, 1339, 9409,  106, 1047,   16,  507,
          16, 5789,   63,   80, 4698, 1197,   11,   93,  433,    3,  890,  111,
         211,  554,  458,   14,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,  

# Training

In [107]:
def train(model, dataloader, optimizer, criterion, device, scaler=None, scheduler=None, config=None):
    model.train()
    batch_bar = tqdm(total=len(dataloader), dynamic_ncols=True, leave=False, position=0, desc='Train')

    num_correct = 0
    total_loss = 0

    for i, (inputs, labels) in enumerate(dataloader):
        optimizer.zero_grad()
        inputs, labels = inputs.to(device), labels.to(device)

        # Mixed precision context (optional)
        with torch.cuda.amp.autocast(enabled=(scaler is not None)):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        if scaler:
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        preds = torch.argmax(outputs, dim=1)
        num_correct += (preds == labels).sum().item()
        total_loss += loss.item()

        batch_bar.set_postfix(
            loss=f"{total_loss/(i+1):.4f}",
            acc=f"{100 * num_correct / ((i+1)*inputs.size(0)):.2f}%",
            lr=f"{optimizer.param_groups[0]['lr']:.6f}"
        )
        batch_bar.update()

        # Scheduler step if NOT ReduceLROnPlateau
        if scheduler is not None and not isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step()

    batch_bar.close()

    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * num_correct / (len(dataloader.dataset))
    return accuracy, avg_loss

# Evaluation

In [108]:
def validate(model, dataloader, criterion, device, config=None):
    model.eval()
    batch_bar = tqdm(total=len(dataloader), dynamic_ncols=True, leave=False, position=0, desc='Val')

    num_correct = 0
    total_loss = 0

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            preds = torch.argmax(outputs, dim=1)
            num_correct += (preds == labels).sum().item()
            total_loss += loss.item()

            batch_bar.set_postfix(
                loss=f"{total_loss/(i+1):.4f}",
                acc=f"{100 * num_correct / ((i+1)*inputs.size(0)):.2f}%"
            )
            batch_bar.update()

    batch_bar.close()

    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * num_correct / (len(dataloader.dataset))
    return accuracy, avg_loss

In [109]:
def train_full_loop(model, train_loader, val_loader, optimizer, criterion, device, config, scheduler=None, scaler=None, patience=5):
    best_val_acc = 0.0
    epochs_no_improve = 0

    for epoch in range(config['epochs']):
        print(f"\nEpoch {epoch + 1}/{config['epochs']}")
        train_acc, train_loss = train(model, train_loader, optimizer, criterion, device, scaler, scheduler, config)
        val_acc, val_loss = validate(model, val_loader, criterion, device, config)

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
        print(f"Val   Loss: {val_loss:.4f}, Val   Acc: {val_acc:.2f}%")
        print(f"Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")

        # ReduceLROnPlateau scheduler step
        if scheduler is not None and isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(val_loss)

        # Early stopping logic
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
            torch.save(model.state_dict(), "best_model.pth")
            print("Best model saved as best_model.pth.")
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {patience} epochs with no improvement.")
                break

        # Clean up to avoid CUDA OOM
        gc.collect()
        torch.cuda.empty_cache()

    print("\nTraining complete.")
    print(f"Best Validation Accuracy: {best_val_acc:.2f}%")

In [110]:
for inputs, labels in train_loader:
    print(labels)
    break


tensor([0, 1, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0,
        2, 1, 0, 0, 0, 2, 0, 2])


In [111]:
scaler = torch.cuda.amp.GradScaler(device)

train_full_loop(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    device=device,
    config=config,
    scheduler=scheduler,
    scaler=scaler,
    patience=2
)

  scaler = torch.cuda.amp.GradScaler(device)



Epoch 1/10


  with torch.cuda.amp.autocast(enabled=(scaler is not None)):
                                                                                                                       

Train Loss: 0.7967, Train Acc: 56.02%
Val   Loss: 0.5984, Val   Acc: 71.69%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 2/10


                                                                                                                       

Train Loss: 0.6492, Train Acc: 69.98%
Val   Loss: 0.5430, Val   Acc: 75.34%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 3/10


                                                                                                                       

Train Loss: 0.5110, Train Acc: 77.33%
Val   Loss: 0.4669, Val   Acc: 80.13%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 4/10


                                                                                                                       

Train Loss: 0.4479, Train Acc: 80.62%
Val   Loss: 0.4271, Val   Acc: 81.89%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 5/10


                                                                                                                       

Train Loss: 0.4010, Train Acc: 83.24%
Val   Loss: 0.4092, Val   Acc: 82.60%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 6/10


                                                                                                                       

Train Loss: 0.3712, Train Acc: 84.75%
Val   Loss: 0.4161, Val   Acc: 82.93%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 7/10


                                                                                                                       

Train Loss: 0.3443, Train Acc: 85.90%
Val   Loss: 0.4188, Val   Acc: 83.24%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 8/10


                                                                                                                       

Train Loss: 0.3213, Train Acc: 87.04%
Val   Loss: 0.4350, Val   Acc: 83.54%
Learning Rate: 0.001000
Best model saved as best_model.pth.

Epoch 9/10


                                                                                                                       

Train Loss: 0.2883, Train Acc: 88.43%
Val   Loss: 0.4242, Val   Acc: 83.79%
Learning Rate: 0.000500
Best model saved as best_model.pth.

Epoch 10/10


                                                                                                                       

Train Loss: 0.2750, Train Acc: 89.05%
Val   Loss: 0.4239, Val   Acc: 83.89%
Learning Rate: 0.000500
Best model saved as best_model.pth.

Training complete.
Best Validation Accuracy: 83.89%


# Testing

In [112]:
def test(model, dataloader, criterion, device):
    model.eval()
    batch_bar = tqdm(total=len(dataloader), dynamic_ncols=True, leave=False, position=0, desc='Test')

    total_loss = 0
    num_correct = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            num_correct += (preds == labels).sum().item()
            total_loss += loss.item()

            batch_bar.set_postfix(
                loss=f"{total_loss/(i+1):.4f}",
                acc=f"{100 * num_correct / ((i+1)*inputs.size(0)):.2f}%"
            )
            batch_bar.update()

    batch_bar.close()
    avg_loss = total_loss / len(dataloader)
    accuracy = 100 * num_correct / len(dataloader.dataset)

    print(f"\nTest Loss: {avg_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
    return accuracy, avg_loss, all_preds, all_labels

In [113]:
model.load_state_dict(torch.load("best_model.pth"))

# Then call the test function
test_accuracy, test_loss, predictions, ground_truth = test(
    model=model,
    dataloader=test_loader,
    criterion=criterion,
    device=device
)

                                                                                                                       


Test Loss: 0.4056, Test Accuracy: 84.52%


