# Homework 3

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import re
import numpy as np
import pandas as pd
import contractions
from tqdm import tqdm
import nltk
import gensim.downloader as gs
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Perceptron
from sklearn.svm import LinearSVC
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR


nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package wordnet to /home/adityaan/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/adityaan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/adityaan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/adityaan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

# Utility functions

In [3]:
CLEAN_HTML = re.compile('<.*?>')            # Regex to match HTML tags
CLEAN_URL = re.compile('(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})')
CLEAN_SPACES = re.compile('\s+')            # Regex to match multiple spaces
CLEAN_NON_ALPHA = re.compile('[^a-zA-Z]')   # Regex to match non-alphabetic characters


class TensorDataset(Dataset):
    def __init__(self, X, y):
        super().__init__()
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        if self.y[index] == 'class_1':
            # label = torch.Tensor([1, 0, 0])
            label = 0
        elif self.y[index] == 'class_2':
            # label = torch.Tensor([0, 1, 0])
            label = 1
        elif self.y[index] == 'class_3':
            # label = torch.Tensor([0, 0, 1])
            label = 2
        return torch.from_numpy(self.X[index]).type(torch.float32), label


def fit_model_cv(pipeline, parameter_grid, training_data, target_labels):
    cv = GridSearchCV(
        estimator=pipeline,
        param_grid=parameter_grid,
    )
    cv.fit(training_data, target_labels)
    return cv


def get_model_metrics(model, testing_data, testing_labels):
    y_pred = model.predict(testing_data)
    accuracy = accuracy_score(testing_labels, y_pred)
    return accuracy


def train_model(
    model, 
    optimizer,
    criterion,
    train_dataloader, 
    val_dataloader,
    num_epochs=30,
    lr_scheduler=None
    ):    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    model = model.to(device)
    
    # Training loop
    for epoch in range(num_epochs):
        metrics = {
            'train_acc': 0,
            'train_loss': 0.0,
            'val_acc': 0,
            'val_loss': 0.0
        }

        for i, (X, y) in enumerate(tqdm(train_dataloader)):
            model.train()
            # Zero optim gradients
            optimizer.zero_grad()

            # Move to GPU
            X = X.to(device)
            y = y.to(device)
            
            # Forward pass
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            
            # print(f"{torch.argmax(outputs, axis=1) = }")
            # print(f"{y = }")
            
            # Calculate the accuracy
            metrics['train_acc'] += (torch.argmax(outputs, axis=1) == y).float().sum()
            
            # Calculate the loss
            metrics['train_loss'] += loss
        
        metrics['train_acc'] /= (len(train_dataloader) * train_dataloader.batch_size)
        metrics['train_loss'] /= (len(train_dataloader) * train_dataloader.batch_size)

        if lr_scheduler is not None:
            lr_scheduler.step()

        for i, (X, y) in enumerate(tqdm(val_dataloader)):
            model.eval()
            
            # Move to GPU
            X = X.to(device)
            y = y.to(device)
            
            # Forward pass
            outputs = model(X)
            loss = criterion(outputs, y)
            
            # Calculate the accuracy
            metrics['val_acc'] += (torch.argmax(outputs, axis=1) == y).float().sum()
            
            # Calculate the loss
            metrics['val_loss'] += loss

            
        metrics['val_acc'] /= (len(val_dataloader) * val_dataloader.batch_size)
        metrics['val_loss'] /= (len(val_dataloader) * val_dataloader.batch_size)
        
        print(f"Epoch: {epoch + 1}/{num_epochs}")
        print("Mode\tLoss\tAcc")
        print(f"Train\t{metrics['train_loss']:.2f}\t{metrics['train_acc']:.2f}")
        print(f"Valid\t{metrics['val_loss']:.2f}\t{metrics['val_acc']:.2f}")
        
    return model, metrics


def bin_column(column, bins, labels):
    """
    :param column: pd.Series
    :param bins: list
    :param labels: list
    :return: pd.Series
    """

    bins.insert(0, -float('inf'))

    # Use pd.IntervalIndex to create bins to split the data
    bins = pd.IntervalIndex.from_breaks(bins)

    print(bins)

    x = pd.cut(column, bins=bins, include_lowest=True)
    x = x.cat.rename_categories(labels)

    return x


def prepare_data(dataframe):
    # Convert ratings to numeric
    # Ignore ratings that are not numerals
    dataframe['star_rating_numeric'] = pd.to_numeric(dataframe.star_rating, errors='coerce')

    # Drop NaN
    dataframe.dropna(inplace=True)
    
    # Consider reviews that have 50 or more characters
    dataframe = dataframe[dataframe.review_body.apply(lambda x: len(x)) > 150]

    # Bin ratings into 3 classes
    # 1 and 2   class_1
    # 3         class_2
    # 4 and 5   class_3

    dataframe['target'] = bin_column(dataframe.star_rating_numeric,
                                     [2, 3, 5],
                                     labels=['class_1', 'class_2', 'class_3'])

    # In the interest of computational simplicity,
    # keep only 20000 instances of each class

    tiny_df = pd.DataFrame(
        columns=['star_rating', 'review_body', 'star_rating_numeric'])

    for cls in dataframe.target.unique():
        tiny_df = pd.concat([
            tiny_df,
            dataframe[dataframe.target == cls].sample(20000, random_state=42)
        ])

    return tiny_df


def clean_text(text):
    # Convert to lower case
    text = text.lower()

    # Remove HTML and URL tags from text
    text = re.sub(CLEAN_HTML, ' ', text)
    text = re.sub(CLEAN_URL, ' ', text)

    # Perform contractions on the text
    text = contractions.fix(text)

    # Remove non-alphabetic characters
    text = re.sub(CLEAN_NON_ALPHA, ' ', text)

    # Remove additional spaces
    text = re.sub(CLEAN_SPACES, ' ', text)

    return text


def get_sentence_embedding(sentence,
                           wv_model, 
                           return_type='average', 
                           num_words=10,
                           flatten=False):
    # Split the sentence on space to get individual words
    words = sentence.split(' ')

    if return_type == 'average':
        sentence_encoding = np.zeros((wv_model.vector_size))
        count = 0
        for word in words:
            try:
                sentence_encoding += wv_model[word]
                count += 1
            except KeyError:
                pass
        if count != 0:
            return sentence_encoding / count
        else:
            return np.zeros((wv_model.vector_size))
            # return 'NA'

    elif return_type == 'truncate':
        sentence_encoding = np.zeros((wv_model.vector_size, num_words))
        count = 0
        idx = 0

        # Consider only first 'n' words
        while idx < min(num_words, len(words)) and count < len(words):
            try:
                sentence_encoding[:, idx] = wv_model[words[count]]
                idx += 1
            except KeyError:
                pass
            count += 1
        
        if flatten:
            # Reshaping to a column vector in advance instead of flattening later
            return sentence_encoding.reshape(-1)
        else:
            return sentence_encoding.T
        
    else:
        raise NotImplementedError('')

# Models

In [4]:
class FNN(nn.Module):
    def __init__(self, input_size, output_size) -> None:
        super().__init__()
        self.input_layer = nn.Linear(input_size, 100)
        self.hidden_layer = nn.Linear(100, 10)
        self.output_layer = nn.Linear(10, output_size)
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.input_layer(x)
        x = self.relu(x)
        
        x = self.hidden_layer(x)
        x = self.relu(x)
        
        x = self.output_layer(x)
        return x


class RNNModel(nn.Module):
    def __init__(self, 
                 input_size, 
                 hidden_size,
                 num_layers,
                 output_size,
                 rnn_layer_type='rnn'):
        super().__init__()

        self.rnn_hidden_size = hidden_size
        self.rnn_num_layers = num_layers
        self.rnn_layer_type = rnn_layer_type.lower()

        if rnn_layer_type.lower() == 'rnn':
            self.rnn = nn.RNN(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True
            )

        elif rnn_layer_type.lower() == 'lstm':
            self.rnn = nn.LSTM(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True
            )

        elif rnn_layer_type.lower() == 'gru':
            self.rnn = nn.GRU(
                input_size=input_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True
            )
        
        else:
            raise NotImplementedError('Only rnn, lstm or gru supported for rnn_layer_type')

        self.hidden_layer = nn.Linear(hidden_size, 10)
        self.out_layer = nn.Linear(10, output_size)
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        if self.rnn_layer_type == 'rnn':
            _, h = self.rnn(x)
        elif self.rnn_layer_type == 'lstm':
            _, (h, _) = self.rnn(x)
        elif self.rnn_layer_type == 'gru':
            _, h = self.rnn(x)
        
        h = h[-1, :, :]
        x = self.hidden_layer(h)
        x = self.relu(x)
        x = self.out_layer(x)
        return x

# Part 1 - Dataset Generation

In [5]:
def read_data(filepath):
    data_og = pd.read_csv(filepath,
                          delimiter='\t',
                          usecols=['star_rating', 'review_body'],
                          on_bad_lines='skip',)
    
    # Prepare the dataset
    # Select reviews with more than 150 characters
    # Select 60000 reviews from each class
    tiny_df = prepare_data(data_og.copy())
    
    # Clean data
    tqdm.pandas()
    print(f"Average length of review before cleaning: {tiny_df.review_body.apply(len).mean()}")
    tiny_df['review_body_pp'] = tiny_df.review_body.progress_apply(clean_text)
    print(f"Average length of review before cleaning: {tiny_df.review_body_pp.apply(len).mean()}")
    
    return tiny_df

In [6]:
tiny_df = read_data('amazon_reviews_us_Beauty_v1_00.tsv')
tiny_df.head(10)

IntervalIndex([(-inf, 2.0], (2.0, 3.0], (3.0, 5.0]], dtype='interval[float64, right]')
Average length of review before cleaning: 437.1336166666667


100%|██████████| 60000/60000 [00:05<00:00, 10287.54it/s]


Average length of review before cleaning: 421.1295


Unnamed: 0,star_rating,review_body,star_rating_numeric,target,review_body_pp
4577737,4,"First off, the scent.<br /> It's excellent. Th...",4.0,class_3,first off the scent it is excellent the aroma ...
4912614,4,This is my first nose trimmer and I like this ...,4.0,class_3,this is my first nose trimmer and i like this ...
3270775,4,"After trying so many different acne products, ...",4.0,class_3,after trying so many different acne products i...
4243609,5,I had noticed that my temporal hairlines were ...,5.0,class_3,i had noticed that my temporal hairlines were ...
4309975,5,i got my plates a few days ago and tried out t...,5.0,class_3,i got my plates a few days ago and tried out t...
2496393,5,I have been using this for about two weeks and...,5.0,class_3,i have been using this for about two weeks and...
70813,5,"I have tried LOTS of lip balm, use it 20 times...",5.0,class_3,i have tried lots of lip balm use it times a d...
3055470,5,I tried other products but nothing works as we...,5.0,class_3,i tried other products but nothing works as we...
131857,5,"I have tried other stem cell treatments, costi...",5.0,class_3,i have tried other stem cell treatments costin...
1743912,5,I used this gel for the first time last night ...,5.0,class_3,i used this gel for the first time last night ...


# Part 2 - Word Embedding

In [7]:
# TODO: Compare vectors
# TODO: Train own word2vec model
word_vec = gs.load('word2vec-google-news-300')

## Part (a)

In [8]:
# Test 1 - King - Man + Woman = Queen
sims = word_vec.most_similar(
    positive=['king', 'woman'],
    negative=['man'])

print("Test 1 - King - Man + Woman = Queen")
print([word for word, _ in sims])

# Test 2 - Plane - Air + Water = Boat
sims = word_vec.most_similar(
    positive=['plane', 'water'],
    negative=['air'])

print("Test 2 - Plane - Air + Water = Boat")
print([word for word, _ in sims])

# Test 3 - Outstanding ~ Excellent
sims = word_vec.most_similar(positive=['outstanding'])

print("Test 3 - Outstanding ~ Excellent")
print([word for word, _ in sims])

# Test 4 - Cat - Kitten + Puppy = Dog
sims = word_vec.most_similar(
    positive=['cat', 'puppy'],
    negative=['kitten'])

print("Test 4 - Cat - Kitten + Puppy = Dog")
print([word for word, _ in sims])

# Test 5 - France - Paris + Tokyo = Japan
sims = word_vec.most_similar(
    positive=['france', 'tokyo'],
    negative=['paris'])

print("Test 5 - France - Paris + Tokyo = Japan")
print([word for word, _ in sims])

Test 1 - King - Man + Woman = Queen
['queen', 'monarch', 'princess', 'crown_prince', 'prince', 'kings', 'Queen_Consort', 'queens', 'sultan', 'monarchy']
Test 2 - Plane - Air + Water = Boat
['boat', 'engine_Cessna', 'Piper_Cherokee', 'radar_deflectors', 'pontoon_boat', 'airplane', 'plane_crashed', 'sailboat', 'canoe', 'desalinator']
Test 3 - Outstanding ~ Excellent
['oustanding', 'Outstanding', 'exceptional', 'anchorman_Jason_Lezak', 'outsanding', 'Stock_HEI', 'excellent', 'Synplicity_FPGA_implementation', 'exemplary', 'W3_Awards_honors']
Test 4 - Cat - Kitten + Puppy = Dog
['dog', 'pet', 'dogs', 'cats', 'pup', 'pooch', 'beagle', 'golden_retriever', 'puppies', 'dachshund']
Test 5 - France - Paris + Tokyo = Japan
['japan', 'hong_kong', 'japanese', 'seoul', 'germany', 'america', 'europe', 'latin_america', 'massachusetts', 'chinese']


## Part (b)

In [9]:
my_word_vec = Word2Vec(
    sentences=[i.split() for i in tiny_df.review_body_pp.to_list()],
    window=13,
    vector_size=300,
    min_count=1
)


# Test 1 - King - Man + Woman = Queen
sims = my_word_vec.wv.most_similar(
    positive=['king', 'woman'],
    negative=['man'])

print("Test 1 - King - Man + Woman = Queen")
print([word for word, _ in sims])

# Test 2 - Plane - Air + Water = Boat/Ship
sims = my_word_vec.wv.most_similar(
    positive=['plane', 'water'],
    negative=['air'])

print("Test 2 - Plane - Air + Water = Boat")
print([word for word, _ in sims])

# Test 3 - Outstanding ~ Excellent
sims = my_word_vec.wv.most_similar(positive=['outstanding'])

print("Test 3 - Outstanding ~ Excellent")
print([word for word, _ in sims])

# Test 4 - Cat - Kitten + Puppy = Dog
sims = my_word_vec.wv.most_similar(
    positive=['cat', 'puppy'],
    negative=['kitten'])

print("Test 4 - Cat - Kitten + Puppy = Dog")
print([word for word, _ in sims])

# Test 5 - France - Paris + Tokyo = Japan
sims = my_word_vec.wv.most_similar(
    positive=['france', 'tokyo'],
    negative=['paris'])

print("Test 5 - France - Paris + Tokyo = Japan")
print([word for word, _ in sims])

Test 1 - King - Man + Woman = Queen
['suppositories', 'simplicity', 'cytochrome', 'ej', 'sable', 'grasping', 'gluconic', 'nouveau', 'clostebol', 'promag']
Test 2 - Plane - Air + Water = Boat
['kitchen', 'cup', 'refilling', 'tub', 'faucet', 'fridge', 'tablet', 'keyboard', 'cabinet', 'brine']
Test 3 - Outstanding ~ Excellent
['excellent', 'exceptional', 'expedient', 'amway', 'inferior', 'incredible', 'acceptable', 'avid', 'authorized', 'authentic']
Test 4 - Cat - Kitten + Puppy = Dog
['tie', 'pinned', 'ears', 'nest', 'crying', 'creeping', 'spine', 'shirt', 'hairline', 'rolls']
Test 5 - France - Paris + Tokyo = Japan
['flowers', 'china', 'luxe', 'headbands', 'wif', 'incense', 'bows', 'scents', 'japan', 'germany']


**Comparison of the above models**

* As it can be seen from the outputs shown above, the pre-trained word2vec model encodes word similarities much better than the model trained on the Amazon reviews dataset
* One of the reasons for this could be the size of the dataset. The amazon reviews is significantly smaller as compared to the google news dataset

In [11]:
def save_embeddings():
    
    # Generate average embedding
    X_average_features = np.vstack(tiny_df.review_body_pp.apply(
        get_sentence_embedding, args=(word_vec, 'average')).to_numpy())
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_average_features, 
                                                        tiny_df.target.to_numpy(),
                                                        test_size=0.2,
                                                        random_state=42)
    
    # Save above arrays to disk
    with open('average_embedding.npz', 'wb') as file:
        np.savez(
            file=file,
            X_train=X_train,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test
        )
        
    print("Saved average embeddings")

    # Generate truncated features with length 10
    X_truncated_features = np.stack(tiny_df.review_body_pp.apply(
        get_sentence_embedding, args=(word_vec, 'truncate', 10, True)).to_numpy())

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_truncated_features, 
                                                        tiny_df.target.to_numpy(),
                                                        test_size=0.2,
                                                        random_state=42)

    # Save above arrays to disk
    with open('truncated_embedding_10.npz', 'wb') as file:
        np.savez(
            file=file,
            X_train=X_train,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test
        )
        
    print("Saved truncated embeddings - 10")
    
    # Generate truncated features with length 20
    X_truncated_features = np.stack(tiny_df.review_body_pp.apply(
        get_sentence_embedding, args=(word_vec, 'truncate', 20, False)).to_numpy())

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_truncated_features, 
                                                        tiny_df.target.to_numpy(),
                                                        test_size=0.2,
                                                        random_state=42)

    # Save above arrays to disk
    with open('truncated_embedding_20.npz', 'wb') as file:
        np.savez(
            file=file,
            X_train=X_train,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test
        )
    
    print("Saved truncated embeddings - 20")
    
    return

In [12]:
save_embeddings()

Saved average embeddings
Saved truncated embeddings - 10
Saved truncated embeddings - 20


In [13]:
# Delete the word2vec object and dataframe to save memory
try:
    del word_vec
    del my_word_vec
    del tiny_df
except:
    pass

**Note:** If you run into memory issues, please restart the kernel and run the below code after you run the imports, utility functions and models section of this notebook.

The following steps have been performed at this point
* Dataset preprocessing - reading, cleaning and selecting 60k reviews
* Feature extraction - 3 sets of features (average, truncated w/ length 10, truncated w/ length 20) have been extracted and saved to `.npz` files

# Part 3 - Simple models

* Perceptron
* Support Vector Machine

In [15]:
def part_3():
    # Read data from npz files
    data = np.load('./average_embedding.npz', allow_pickle=True)
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    # Train a perceptron model
    perceptron = Perceptron(eta0=1.5)
    perceptron.fit(X_train, y_train)
    accuracy = get_model_metrics(perceptron, X_test, y_test)
    print(f"Accuracy using Perceptron model: {accuracy}")
    
    # Train SVM model
    svc = LinearSVC(penalty='l2', max_iter=1000)
    svc.fit(X_train, y_train)
    accuracy = get_model_metrics(svc, X_test, y_test)
    print(f"Accuracy using SVM model: {accuracy}")


part_3()

Accuracy using Perceptron model: 0.63475
Accuracy using SVM model: 0.6550833333333334


**Accuracies of Perceptron and SVM trained using TFIDF/Word2Vec Features**


|            | TFIDF(%) | Word2Vec(%) |
|:----------:|:--------:|:-----------:|
| Perceptron |   69.77  |    63.47    |
|     SVM    |   73.4   |    65.50    |

* Although we expect the model trained on Word2Vec features to work better as compared to the one trained on TFIDF features, we can see that the performance of TF-IDF models is better in this case
* This could be attributed to the fact that the TF-IDF features are trained on just the amazon reviews dataset whereas the pretrained Word2Vec model was trained on a much larger dataset. Hence, TF-IDF was a better option since it was better suited for the task at hand

# Part 4 - Feedforward Neural Networks

In [16]:
def part_4_a():
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    vector_size = 300
    num_classes = 3
    
    model = FNN(
        input_size=vector_size, 
        output_size=num_classes
    )
    
    data = np.load('./average_embedding.npz', allow_pickle=True)
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_test, y_test)
    
    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=64,
        shuffle=True,
    )
    
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=64,
        shuffle=False,
    )
    
    # Define optimizer and criterion
    optim = Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    model, metrics = train_model(
        model=model,
        train_dataloader=train_dataloader,
        val_dataloader=val_dataloader,
        optimizer=optim,
        criterion=criterion,
        num_epochs=10
    )
    
    # torch.save(model.state_dict(), './saved_models/4a.pth')
    
    print(f"Accuracy of FNN model trained on average vectors: {metrics['val_acc']}")
    
    return model


_ = part_4_a()

100%|██████████| 750/750 [00:01<00:00, 382.55it/s]
100%|██████████| 188/188 [00:00<00:00, 1026.21it/s]


Epoch: 1/10
Mode	Loss	Acc
Train	0.02	0.47
Valid	0.01	0.62


100%|██████████| 750/750 [00:01<00:00, 467.81it/s]
100%|██████████| 188/188 [00:00<00:00, 1047.23it/s]


Epoch: 2/10
Mode	Loss	Acc
Train	0.01	0.63
Valid	0.01	0.64


100%|██████████| 750/750 [00:01<00:00, 466.15it/s]
100%|██████████| 188/188 [00:00<00:00, 1066.45it/s]


Epoch: 3/10
Mode	Loss	Acc
Train	0.01	0.64
Valid	0.01	0.64


100%|██████████| 750/750 [00:01<00:00, 466.67it/s]
100%|██████████| 188/188 [00:00<00:00, 1064.52it/s]


Epoch: 4/10
Mode	Loss	Acc
Train	0.01	0.64
Valid	0.01	0.64


100%|██████████| 750/750 [00:01<00:00, 465.49it/s]
100%|██████████| 188/188 [00:00<00:00, 1084.80it/s]


Epoch: 5/10
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.65


100%|██████████| 750/750 [00:01<00:00, 463.41it/s]
100%|██████████| 188/188 [00:00<00:00, 1066.50it/s]


Epoch: 6/10
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.65


100%|██████████| 750/750 [00:01<00:00, 463.58it/s]
100%|██████████| 188/188 [00:00<00:00, 1069.51it/s]


Epoch: 7/10
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.64


100%|██████████| 750/750 [00:01<00:00, 467.24it/s]
100%|██████████| 188/188 [00:00<00:00, 1061.07it/s]


Epoch: 8/10
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.65


100%|██████████| 750/750 [00:01<00:00, 466.63it/s]
100%|██████████| 188/188 [00:00<00:00, 1034.58it/s]


Epoch: 9/10
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.65


100%|██████████| 750/750 [00:01<00:00, 473.43it/s]
100%|██████████| 188/188 [00:00<00:00, 1055.67it/s]

Epoch: 10/10
Mode	Loss	Acc
Train	0.01	0.66
Valid	0.01	0.65
Accuracy of FNN model trained on average vectors: 0.6545877456665039





In [18]:
def part_4_b():
    vector_size = 3000
    num_classes = 3
    
    model = FNN(
        input_size=vector_size, 
        output_size=num_classes
    )
    
    data = np.load('./truncated_embedding_10.npz', allow_pickle=True)
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    print(X_train.shape)
    
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_test, y_test)
    
    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=64,
        shuffle=True,
    )
    
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=64,
        shuffle=False,
    )
    
    # Define optimizer and criterion
    optim = Adam(model.parameters(), lr=0.001)
    lr_scheduler = StepLR(optim, 1, 0.05)
    criterion = nn.CrossEntropyLoss()
    
    model, metrics = train_model(
        model=model,
        train_dataloader=train_dataloader,
        val_dataloader=val_dataloader,
        optimizer=optim,
        criterion=criterion,
        num_epochs=10,
        lr_scheduler=lr_scheduler,
    )
    
    print(f"Accuracy of FNN model trained on truncated vectors: {metrics['val_acc']}")
    
    # torch.save(model.state_dict(), './saved_models/4b.pth')
    return model


_ = part_4_b()

(48000, 3000)


100%|██████████| 750/750 [00:02<00:00, 358.57it/s]
100%|██████████| 188/188 [00:00<00:00, 659.77it/s]


Epoch: 1/10
Mode	Loss	Acc
Train	0.02	0.49
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 364.73it/s]
100%|██████████| 188/188 [00:00<00:00, 658.10it/s]


Epoch: 2/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 363.63it/s]
100%|██████████| 188/188 [00:00<00:00, 656.58it/s]


Epoch: 3/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 365.75it/s]
100%|██████████| 188/188 [00:00<00:00, 643.22it/s]


Epoch: 4/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 361.29it/s]
100%|██████████| 188/188 [00:00<00:00, 655.77it/s]


Epoch: 5/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 364.96it/s]
100%|██████████| 188/188 [00:00<00:00, 663.16it/s]


Epoch: 6/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 369.40it/s]
100%|██████████| 188/188 [00:00<00:00, 639.76it/s]


Epoch: 7/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 361.84it/s]
100%|██████████| 188/188 [00:00<00:00, 648.44it/s]


Epoch: 8/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 364.54it/s]
100%|██████████| 188/188 [00:00<00:00, 644.91it/s]


Epoch: 9/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52


100%|██████████| 750/750 [00:02<00:00, 368.70it/s]
100%|██████████| 188/188 [00:00<00:00, 648.84it/s]


Epoch: 10/10
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.02	0.52
Accuracy of FNN model trained on truncated vectors: 0.5221908092498779


**Accuracies of FNN models trained on Word2Vec features**

|     | Average Features(%) | First 10 Words(%) |
|:---:|:-------------------:|:-----------------:|
| FNN |        65.45        |       52.21       |

* It can be seen that the performance of the model trained on features generated by the average of all words in the sentence performed better
* This could be due to the reason that we are considering all the words in the sentence to calculate the average whereas we are considering only the fist 10 words in the later case
* It was also observed that the second model was overfitting to the training data and hence resulted in poor performance on the testing data. To mitigate for the overfitting problem, I used a learning rate scheduler for the second FNN

* As compared with the simple models, we can see that FNN outperforms the perceptron model and achieves similar performance to the SVM model
* Since there are significantly higher degrees of freedom in a FNN as compared to the perceptron, the model has higher chances of fitting to the training data better

# Part (5) - Recurrent Neural Networks

In [19]:
def part_5_a():
    vector_size = 300
    num_classes = 3
    
    model = RNNModel(
        input_size=vector_size,
        hidden_size=20,
        num_layers=1,
        output_size=num_classes
    )
    
    data = np.load('./truncated_embedding_20.npz', allow_pickle=True)
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_test, y_test)
    
    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=128,
        shuffle=True,
    )
    
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=64,
        shuffle=False,
    )
    
    # Define optimizer and criterion
    optim = Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    model, metrics = train_model(
        model=model,
        train_dataloader=train_dataloader,
        val_dataloader=val_dataloader,
        optimizer=optim,
        criterion=criterion,
        num_epochs=20
    )
    
    print(f"Accuracy of RNN model: {metrics['val_acc']}")
    
    return model


_ = part_5_a()

100%|██████████| 375/375 [00:02<00:00, 176.77it/s]
100%|██████████| 188/188 [00:00<00:00, 466.38it/s]


Epoch: 1/20
Mode	Loss	Acc
Train	0.01	0.41
Valid	0.02	0.48


100%|██████████| 375/375 [00:02<00:00, 185.97it/s]
100%|██████████| 188/188 [00:00<00:00, 470.70it/s]


Epoch: 2/20
Mode	Loss	Acc
Train	0.01	0.50
Valid	0.02	0.52


100%|██████████| 375/375 [00:02<00:00, 186.61it/s]
100%|██████████| 188/188 [00:00<00:00, 461.27it/s]


Epoch: 3/20
Mode	Loss	Acc
Train	0.01	0.52
Valid	0.02	0.52


100%|██████████| 375/375 [00:02<00:00, 185.29it/s]
100%|██████████| 188/188 [00:00<00:00, 470.86it/s]


Epoch: 4/20
Mode	Loss	Acc
Train	0.01	0.53
Valid	0.02	0.52


100%|██████████| 375/375 [00:02<00:00, 183.75it/s]
100%|██████████| 188/188 [00:00<00:00, 463.24it/s]


Epoch: 5/20
Mode	Loss	Acc
Train	0.01	0.53
Valid	0.01	0.53


100%|██████████| 375/375 [00:02<00:00, 186.14it/s]
100%|██████████| 188/188 [00:00<00:00, 471.01it/s]


Epoch: 6/20
Mode	Loss	Acc
Train	0.01	0.54
Valid	0.01	0.54


100%|██████████| 375/375 [00:02<00:00, 185.90it/s]
100%|██████████| 188/188 [00:00<00:00, 470.88it/s]


Epoch: 7/20
Mode	Loss	Acc
Train	0.01	0.54
Valid	0.01	0.55


100%|██████████| 375/375 [00:02<00:00, 184.55it/s]
100%|██████████| 188/188 [00:00<00:00, 461.48it/s]


Epoch: 8/20
Mode	Loss	Acc
Train	0.01	0.55
Valid	0.01	0.53


100%|██████████| 375/375 [00:02<00:00, 182.97it/s]
100%|██████████| 188/188 [00:00<00:00, 474.46it/s]


Epoch: 9/20
Mode	Loss	Acc
Train	0.01	0.55
Valid	0.01	0.55


100%|██████████| 375/375 [00:02<00:00, 185.70it/s]
100%|██████████| 188/188 [00:00<00:00, 465.67it/s]


Epoch: 10/20
Mode	Loss	Acc
Train	0.01	0.56
Valid	0.01	0.56


100%|██████████| 375/375 [00:02<00:00, 184.05it/s]
100%|██████████| 188/188 [00:00<00:00, 464.66it/s]


Epoch: 11/20
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.01	0.57


100%|██████████| 375/375 [00:02<00:00, 185.82it/s]
100%|██████████| 188/188 [00:00<00:00, 472.09it/s]


Epoch: 12/20
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.01	0.57


100%|██████████| 375/375 [00:02<00:00, 185.01it/s]
100%|██████████| 188/188 [00:00<00:00, 465.14it/s]


Epoch: 13/20
Mode	Loss	Acc
Train	0.01	0.58
Valid	0.01	0.54


100%|██████████| 375/375 [00:02<00:00, 184.79it/s]
100%|██████████| 188/188 [00:00<00:00, 469.43it/s]


Epoch: 14/20
Mode	Loss	Acc
Train	0.01	0.58
Valid	0.01	0.57


100%|██████████| 375/375 [00:02<00:00, 183.60it/s]
100%|██████████| 188/188 [00:00<00:00, 463.59it/s]


Epoch: 15/20
Mode	Loss	Acc
Train	0.01	0.58
Valid	0.01	0.57


100%|██████████| 375/375 [00:02<00:00, 185.43it/s]
100%|██████████| 188/188 [00:00<00:00, 465.86it/s]


Epoch: 16/20
Mode	Loss	Acc
Train	0.01	0.59
Valid	0.01	0.57


100%|██████████| 375/375 [00:02<00:00, 185.67it/s]
100%|██████████| 188/188 [00:00<00:00, 468.87it/s]


Epoch: 17/20
Mode	Loss	Acc
Train	0.01	0.59
Valid	0.01	0.57


100%|██████████| 375/375 [00:02<00:00, 186.52it/s]
100%|██████████| 188/188 [00:00<00:00, 474.83it/s]


Epoch: 18/20
Mode	Loss	Acc
Train	0.01	0.59
Valid	0.01	0.58


100%|██████████| 375/375 [00:02<00:00, 185.94it/s]
100%|██████████| 188/188 [00:00<00:00, 461.77it/s]


Epoch: 19/20
Mode	Loss	Acc
Train	0.01	0.59
Valid	0.01	0.58


100%|██████████| 375/375 [00:02<00:00, 185.73it/s]
100%|██████████| 188/188 [00:00<00:00, 468.67it/s]

Epoch: 20/20
Mode	Loss	Acc
Train	0.01	0.59
Valid	0.01	0.58
Accuracy of RNN model: 0.5826961398124695





**Comparison of FNN and RNN**

* FNN trained on average features outperforms RNN in the task at hand
* This could be due to a similar reason that the FNN was trained on features generated from all the words as compared to the RNN that was trained only on the first 20 words
* RNNs are expected to perform better in scenarios that involve long term dependencies. Given that most of the amazon reviews are not very long, we might run into the risk of using a model that is an overkill for the current task

In [20]:
def part_5_b():
    vector_size = 300
    num_classes = 3
    
    model = RNNModel(
        input_size=vector_size,
        hidden_size=20,
        num_layers=2,
        output_size=num_classes,
        rnn_layer_type='gru'
    )
    
    data = np.load('./truncated_embedding_20.npz', allow_pickle=True)
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_test, y_test)
    
    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=64,
        shuffle=True,
    )
    
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=64,
        shuffle=False,
    )
    
    # Define optimizer and criterion
    optim = Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    model, metrics = train_model(
        model=model,
        train_dataloader=train_dataloader,
        val_dataloader=val_dataloader,
        optimizer=optim,
        criterion=criterion,
        num_epochs=20
    )
    
    print(f"Accuracy of GRU model: {metrics['val_acc']}")
    
    return model


_ = part_5_b()

100%|██████████| 750/750 [00:03<00:00, 224.99it/s]
100%|██████████| 188/188 [00:00<00:00, 466.73it/s]


Epoch: 1/20
Mode	Loss	Acc
Train	0.02	0.50
Valid	0.01	0.57


100%|██████████| 750/750 [00:03<00:00, 226.86it/s]
100%|██████████| 188/188 [00:00<00:00, 467.47it/s]


Epoch: 2/20
Mode	Loss	Acc
Train	0.01	0.59
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 225.60it/s]
100%|██████████| 188/188 [00:00<00:00, 473.66it/s]


Epoch: 3/20
Mode	Loss	Acc
Train	0.01	0.61
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 223.44it/s]
100%|██████████| 188/188 [00:00<00:00, 461.10it/s]


Epoch: 4/20
Mode	Loss	Acc
Train	0.01	0.62
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 221.74it/s]
100%|██████████| 188/188 [00:00<00:00, 457.84it/s]


Epoch: 5/20
Mode	Loss	Acc
Train	0.01	0.63
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 225.07it/s]
100%|██████████| 188/188 [00:00<00:00, 459.49it/s]


Epoch: 6/20
Mode	Loss	Acc
Train	0.01	0.64
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 223.40it/s]
100%|██████████| 188/188 [00:00<00:00, 463.89it/s]


Epoch: 7/20
Mode	Loss	Acc
Train	0.01	0.64
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 223.59it/s]
100%|██████████| 188/188 [00:00<00:00, 472.95it/s]


Epoch: 8/20
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 224.17it/s]
100%|██████████| 188/188 [00:00<00:00, 470.18it/s]


Epoch: 9/20
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 223.89it/s]
100%|██████████| 188/188 [00:00<00:00, 458.03it/s]


Epoch: 10/20
Mode	Loss	Acc
Train	0.01	0.66
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 225.29it/s]
100%|██████████| 188/188 [00:00<00:00, 469.67it/s]


Epoch: 11/20
Mode	Loss	Acc
Train	0.01	0.66
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 224.33it/s]
100%|██████████| 188/188 [00:00<00:00, 459.04it/s]


Epoch: 12/20
Mode	Loss	Acc
Train	0.01	0.66
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 222.37it/s]
100%|██████████| 188/188 [00:00<00:00, 462.96it/s]


Epoch: 13/20
Mode	Loss	Acc
Train	0.01	0.67
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 221.43it/s]
100%|██████████| 188/188 [00:00<00:00, 466.25it/s]


Epoch: 14/20
Mode	Loss	Acc
Train	0.01	0.67
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 225.34it/s]
100%|██████████| 188/188 [00:00<00:00, 470.40it/s]


Epoch: 15/20
Mode	Loss	Acc
Train	0.01	0.68
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 225.01it/s]
100%|██████████| 188/188 [00:00<00:00, 482.25it/s]


Epoch: 16/20
Mode	Loss	Acc
Train	0.01	0.68
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 223.87it/s]
100%|██████████| 188/188 [00:00<00:00, 463.48it/s]


Epoch: 17/20
Mode	Loss	Acc
Train	0.01	0.68
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 221.63it/s]
100%|██████████| 188/188 [00:00<00:00, 464.09it/s]


Epoch: 18/20
Mode	Loss	Acc
Train	0.01	0.69
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 223.32it/s]
100%|██████████| 188/188 [00:00<00:00, 458.72it/s]


Epoch: 19/20
Mode	Loss	Acc
Train	0.01	0.69
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 222.72it/s]
100%|██████████| 188/188 [00:00<00:00, 453.82it/s]

Epoch: 20/20
Mode	Loss	Acc
Train	0.01	0.69
Valid	0.01	0.61
Accuracy of GRU model: 0.611951470375061





In [21]:
def part_5_c():
    vector_size = 300
    num_classes = 3
    
    model = RNNModel(
        input_size=vector_size,
        hidden_size=20,
        num_layers=2,
        output_size=num_classes,
        rnn_layer_type='lstm'
    )
    
    data = np.load('./truncated_embedding_20.npz', allow_pickle=True)
    X_train = data['X_train']
    y_train = data['y_train']
    X_test = data['X_test']
    y_test = data['y_test']
    
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_test, y_test)
    
    train_dataloader = DataLoader(
        train_dataset, 
        batch_size=64,
        shuffle=True,
    )
    
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=64,
        shuffle=False,
    )
    
    # Define optimizer and criterion
    optim = Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    model, metrics = train_model(
        model=model,
        train_dataloader=train_dataloader,
        val_dataloader=val_dataloader,
        optimizer=optim,
        criterion=criterion,
        num_epochs=20
    )
    
    print(f"Accuracy of LSTM model: {metrics['val_acc']}")
    
    return model


_ = part_5_c()

100%|██████████| 750/750 [00:03<00:00, 221.48it/s]
100%|██████████| 188/188 [00:00<00:00, 454.61it/s]


Epoch: 1/20
Mode	Loss	Acc
Train	0.02	0.48
Valid	0.01	0.55


100%|██████████| 750/750 [00:03<00:00, 220.00it/s]
100%|██████████| 188/188 [00:00<00:00, 470.44it/s]


Epoch: 2/20
Mode	Loss	Acc
Train	0.01	0.57
Valid	0.01	0.57


100%|██████████| 750/750 [00:03<00:00, 221.55it/s]
100%|██████████| 188/188 [00:00<00:00, 462.72it/s]


Epoch: 3/20
Mode	Loss	Acc
Train	0.01	0.59
Valid	0.01	0.59


100%|██████████| 750/750 [00:03<00:00, 220.25it/s]
100%|██████████| 188/188 [00:00<00:00, 459.30it/s]


Epoch: 4/20
Mode	Loss	Acc
Train	0.01	0.61
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 213.15it/s]
100%|██████████| 188/188 [00:00<00:00, 454.40it/s]


Epoch: 5/20
Mode	Loss	Acc
Train	0.01	0.62
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 219.11it/s]
100%|██████████| 188/188 [00:00<00:00, 457.97it/s]


Epoch: 6/20
Mode	Loss	Acc
Train	0.01	0.63
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 218.90it/s]
100%|██████████| 188/188 [00:00<00:00, 453.92it/s]


Epoch: 7/20
Mode	Loss	Acc
Train	0.01	0.63
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 220.46it/s]
100%|██████████| 188/188 [00:00<00:00, 456.44it/s]


Epoch: 8/20
Mode	Loss	Acc
Train	0.01	0.64
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 220.11it/s]
100%|██████████| 188/188 [00:00<00:00, 456.24it/s]


Epoch: 9/20
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 221.39it/s]
100%|██████████| 188/188 [00:00<00:00, 459.93it/s]


Epoch: 10/20
Mode	Loss	Acc
Train	0.01	0.65
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 219.58it/s]
100%|██████████| 188/188 [00:00<00:00, 450.10it/s]


Epoch: 11/20
Mode	Loss	Acc
Train	0.01	0.66
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 219.41it/s]
100%|██████████| 188/188 [00:00<00:00, 452.99it/s]


Epoch: 12/20
Mode	Loss	Acc
Train	0.01	0.66
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 220.88it/s]
100%|██████████| 188/188 [00:00<00:00, 453.35it/s]


Epoch: 13/20
Mode	Loss	Acc
Train	0.01	0.67
Valid	0.01	0.61


100%|██████████| 750/750 [00:03<00:00, 218.48it/s]
100%|██████████| 188/188 [00:00<00:00, 450.89it/s]


Epoch: 14/20
Mode	Loss	Acc
Train	0.01	0.67
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 219.92it/s]
100%|██████████| 188/188 [00:00<00:00, 461.99it/s]


Epoch: 15/20
Mode	Loss	Acc
Train	0.01	0.67
Valid	0.01	0.60


100%|██████████| 750/750 [00:03<00:00, 216.85it/s]
100%|██████████| 188/188 [00:00<00:00, 451.58it/s]


Epoch: 16/20
Mode	Loss	Acc
Train	0.01	0.68
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 219.13it/s]
100%|██████████| 188/188 [00:00<00:00, 451.70it/s]


Epoch: 17/20
Mode	Loss	Acc
Train	0.01	0.68
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 223.84it/s]
100%|██████████| 188/188 [00:00<00:00, 455.64it/s]


Epoch: 18/20
Mode	Loss	Acc
Train	0.01	0.68
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 220.84it/s]
100%|██████████| 188/188 [00:00<00:00, 455.18it/s]


Epoch: 19/20
Mode	Loss	Acc
Train	0.01	0.69
Valid	0.01	0.62


100%|██████████| 750/750 [00:03<00:00, 220.87it/s]
100%|██████████| 188/188 [00:00<00:00, 462.21it/s]

Epoch: 20/20
Mode	Loss	Acc
Train	0.01	0.69
Valid	0.01	0.61
Accuracy of LSTM model: 0.6144447922706604





**Comparison of RNN, GRU and LSTM**

|      | Accuracy(%) |
|:----:|:-----------:|
|  RNN |    58.26    |
|  GRU |    61.19    |
| LSTM |    61.44    |

* As it can be seen from the above table, LSTM performs better than RNN and GRU
* RNNs also suffer from the problem of vanishing gradients which leads to smaller weight updates as epochs progress. LSTMs address this issue
* LSTMs also have the ability to forget irrelevant information using the "forget gate". This could also be the reason for the improved performance of LSTMs

# Final Conclusion

|            Model           | Accuracy(%) |
|:--------------------------:|:-----------:|
|         Perceptron         |    63.47    |
|           **SVM**          |  **65.50**  |
| **FNN (Average features)** |  **65.45**  |
|  FNN (Truncated features)  |    52.21    |
|             RNN            |    58.26    |
|             GRU            |    61.19    |
|            LSTM            |    61.44    |