# CSCI-544 Homework Assignment No. 2
### Name : Ashwin Chafale
### USC ID : 1990624801

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

## 1. Dataset Generation
- [Amazon reviews dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Jewelry_v1_00.tsv.gz)

In [2]:
df = pd.read_csv("amazon_reviews_us_Jewelry_v1_00.tsv", sep='\t', header=0, on_bad_lines='skip')
df = df[['review_body','star_rating']]
df = df.dropna()
df = df.reset_index(drop=True)
df['star_rating'] = df['star_rating'].astype(int)
df.shape

(1766748, 2)

In [3]:
df['star_rating'].value_counts()

5    1080871
4     270424
3     159654
1     155002
2     100797
Name: star_rating, dtype: int64

### i. Down-sample 5-star & 4-star reviews, Up-sample 3-star, 2-star, 1-star reviews to get 100K balance dataset
Reference : https://elitedatascience.com/imbalanced-classes

In [4]:
from sklearn.utils import resample
# separating reviews
five_star =  df.loc[ df['star_rating'] == 5]
four_star =  df.loc[ df['star_rating'] == 4]
three_star =  df.loc[ df['star_rating'] == 3]
two_star =  df.loc[ df['star_rating'] == 2]
one_star =  df.loc[ df['star_rating'] == 1]

# Downsample 5-star class
five_star_downsampled = resample(five_star,
                                 replace=False,    # sample without replacement
                                 n_samples=20000,     # to match minority class
                                 random_state=123) # reproducible results

# Downsample 4-star class
four_star_downsampled = resample(four_star,
                                 replace=False,    # sample without replacement
                                 n_samples=20000,     # to match minority class
                                 random_state=123) # reproducible results

# Upsample 3-star class
three_star_upsampled = resample(three_star,
                                replace=True,     # sample with replacement
                                n_samples=20000,    # to match majority class
                                random_state=123) # reproducible results

# Upsample 2-star class
two_star_upsampled = resample(two_star,
                              replace=True,     # sample with replacement
                              n_samples=20000,    # to match majority class
                              random_state=123) # reproducible results

# Upsample 1-star class
one_star_upsampled = resample(one_star,
                              replace=True,     # sample with replacement
                              n_samples=20000,    # to match majority class
                              random_state=123) # reproducible results

balanced_data = pd.concat([five_star_downsampled, four_star_downsampled, three_star_upsampled, two_star_upsampled, one_star_upsampled], axis=0)
balanced_data["star_rating"].value_counts()

5    20000
4    20000
3    20000
2    20000
1    20000
Name: star_rating, dtype: int64

### ii. Test-train split

In [5]:
# Train - test split
from sklearn.model_selection import train_test_split

five_star_X_train, five_star_X_test, five_star_Y_train, five_star_Y_test = \
    train_test_split(balanced_data[balanced_data["star_rating"] == 5]["review_body"],
                     balanced_data[balanced_data["star_rating"] == 5]["star_rating"], test_size=0.2, random_state=30)

four_star_X_train, four_star_X_test, four_star_Y_train, four_star_Y_test = \
    train_test_split(balanced_data[balanced_data["star_rating"] == 4]["review_body"],
                     balanced_data[balanced_data["star_rating"] == 4]["star_rating"], test_size=0.2, random_state=30)

three_star_X_train, three_star_X_test, three_star_Y_train, three_star_Y_test = \
    train_test_split(balanced_data[balanced_data["star_rating"] == 3]["review_body"],
                     balanced_data[balanced_data["star_rating"] == 3]["star_rating"], test_size=0.2, random_state=30)

two_star_X_train, two_star_X_test, two_star_Y_train, two_star_Y_test = \
    train_test_split(balanced_data[balanced_data["star_rating"] == 2]["review_body"],
                     balanced_data[balanced_data["star_rating"] == 2]["star_rating"], test_size=0.2, random_state=30)

one_star_X_train, one_star_X_test, one_star_Y_train, one_star_Y_test = \
    train_test_split(balanced_data[balanced_data["star_rating"] == 1]["review_body"],
                     balanced_data[balanced_data["star_rating"] == 1]["star_rating"], test_size=0.2, random_state=30)

X_train = pd.concat([five_star_X_train, four_star_X_train, three_star_X_train, two_star_X_train, one_star_X_train])
X_test = pd.concat([five_star_X_test, four_star_X_test, three_star_X_test, two_star_X_test, one_star_X_test])
Y_train = pd.concat([five_star_Y_train, four_star_Y_train, three_star_Y_train, two_star_Y_train, one_star_Y_train])
Y_test = pd.concat([five_star_Y_test, four_star_Y_test, three_star_Y_test, two_star_Y_test, one_star_Y_test])

print("Train: ", X_train.shape, Y_train.shape, "Test: ", (X_test.shape, Y_test.shape))

Train:  (80000,) (80000,) Test:  ((20000,), (20000,))


### iii. Data Preprocessing

In [6]:
from bs4 import BeautifulSoup
import re
import contractions
import nltk
from nltk.stem import WordNetLemmatizer

def data_preprocessing(data):
    # convert all reviews to lower case
    data = data.apply(lambda x: " ".join(x.lower() for x in str(x).split()))

    # remove HTML tags as well as URLs from reviews.
    data = data.apply(lambda x: BeautifulSoup(x).get_text())
    data = data.apply(lambda x: re.sub(r'https?://\S+|www\.\S+', "", x))

    # contractions
    data = data.apply(lambda x:contractions.fix(x))

    # remove the non-alpha characters
    data = data.apply(lambda x: " ".join([re.sub("[^A-Za-z]+","", x) for x in nltk.word_tokenize(x)]))

    # remove extra spaces among the words
    data = data.apply(lambda x: re.sub(' +', ' ', x))

    # removing stop words
    stop_words=['the', 'a', 'and', 'is', 'be', 'will', 'are']
    data = data.apply(lambda x: " ".join([x for x in x.split() if x not in stop_words]))

    lemmatizer = WordNetLemmatizer()
    data = data.apply(lambda x: " ".join([lemmatizer.lemmatize(w) for w in nltk.word_tokenize(x)]))

    return data

In [7]:
X_train = data_preprocessing(X_train)
X_test = data_preprocessing(X_test)

## 2. Word Embedding 
Reference : https://radimrehurek.com/gensim/auto_examples/tutorials/run_word2vec.html

### a) Exploring pretrained “word2vec-google-news-300”

In [8]:
# Loading 'word2vec-google-news-300' model
import gensim.downloader as api
wv_google = api.load('word2vec-google-news-300')

In [9]:
# checking semantic similarities
# Example 1
result = wv_google.most_similar(positive=['woman', 'king'], negative=['man'])
print("{}: {:.4f}".format(*result[0]))

queen: 0.7118


In [10]:
# Example 2
wv_google.similarity('excellent', 'outstanding')

0.55674857

In [11]:
# Example 3
wv_google.doesnt_match(['fire', 'water', 'land', 'sea', 'air', 'car'])

'car'

### b) Train a Word2Vec model using your own dataset.
Reference : https://www.kaggle.com/code/chewzy/tutorial-how-to-train-your-custom-word-embedding

In [16]:
from gensim.models import Word2Vec
full_dataset = pd.concat([X_train, X_test],axis=0)
sentences = []
for review in full_dataset:
  tokens = review.split()
  sentences.append(tokens)


In [17]:
custom_wv_model = Word2Vec(sentences=sentences, size=300, window=11, min_count=10)

In [19]:
result = custom_wv_model.most_similar(positive=['woman', 'king'], negative=['man'])
print("{}: {:.4f}".format(*result[0]))

avenue: 0.5790


In [20]:
# Example 1
custom_wv_model.similarity('excellent', 'outstanding')

0.7977613

In [21]:
# Example 2
custom_wv_model.most_similar("good")

[('decent', 0.8035435676574707),
 ('great', 0.7997811436653137),
 ('nice', 0.6498663425445557),
 ('high', 0.6413455009460449),
 ('excellent', 0.6235317587852478),
 ('ok', 0.6038389205932617),
 ('fantastic', 0.5990501046180725),
 ('poor', 0.5846014022827148),
 ('bad', 0.5676741003990173),
 ('control', 0.5650346875190735)]

#### Question : What do you conclude from comparing vectors generated by yourself and the pretrained model? Which of the Word2Vec models seems to encode semantic similarities between words better?
Answer : Pretrained Google word2vec model have diverse variety of words in its vocabulary and therefore is able to capture semantic similarities of diverse set of  words better. For our own custom build model we need to a large and diverse corpus to train to get the desired results.
Hence, Pretrained google word2vec is better than our own custom build.

## 3. Simple models

### Average Word2Vec vectors for each review

In [22]:
# average word2vec
def get_avg_wor2vec(_reviews):
    word_list = _reviews.split()
    words_cnt = 0
    word_vector = np.zeros(300)
    for word in word_list:
        if word in wv_google:
            word_vector += wv_google[word]
            words_cnt += 1
    if words_cnt != 0:
        word_vector /= words_cnt
    return word_vector

In [23]:
train_vec = []
for reviews in X_train:
    train_vec.append(get_avg_wor2vec(reviews))
train_vec = np.array(train_vec)

test_vec = []
for reviews in X_test:
    test_vec.append(get_avg_wor2vec(reviews))
test_vec = np.array(test_vec)

### a) Perceptron

In [24]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import classification_report
perceptron = Perceptron(max_iter=1000, random_state=0)
perceptron.fit(train_vec,Y_train)
y_test_predicted = perceptron.predict(test_vec)

report = classification_report(Y_test, y_test_predicted, output_dict=True)
pd.DataFrame.from_dict(report)

Unnamed: 0,1,2,3,4,5,accuracy,macro avg,weighted avg
precision,0.469344,0.220267,0.363636,0.397626,0.697067,0.33365,0.429588,0.429588
recall,0.54925,0.70425,0.001,0.0335,0.38025,0.33365,0.33365,0.33365
f1-score,0.506163,0.335577,0.001995,0.061794,0.492074,0.33365,0.27952,0.27952
support,4000.0,4000.0,4000.0,4000.0,4000.0,0.33365,20000.0,20000.0


### b) SVM

In [25]:
from sklearn.svm import LinearSVC
svm = LinearSVC(multi_class="ovr", random_state=0)
svm.fit(train_vec,Y_train)
y_test_predicted = svm.predict(test_vec)

report = classification_report(Y_test, y_test_predicted, output_dict=True)
pd.DataFrame.from_dict(report)

Unnamed: 0,1,2,3,4,5,accuracy,macro avg,weighted avg
precision,0.511508,0.399172,0.401657,0.435879,0.585394,0.4851,0.466722,0.466722
recall,0.71675,0.26525,0.37575,0.29825,0.7695,0.4851,0.4851,0.4851
f1-score,0.596981,0.318714,0.388272,0.354164,0.664938,0.4851,0.464614,0.464614
support,4000.0,4000.0,4000.0,4000.0,4000.0,0.4851,20000.0,20000.0


#### Comparing performance of Perceptron & SVM model trained using TF-IDF and Word2Vec features
Reading accuracy values of Perceptron and SVM model from HW1

In [None]:
perceptron_using_tfidf = pd.read_csv("perceptron.csv")
perceptron_using_tfidf

Unnamed: 0.1,Unnamed: 0,1,2,3,4,5,accuracy,macro avg,weighted avg
0,precision,0.529361,0.302142,0.320768,0.383514,0.590234,0.414,0.425204,0.425204
1,recall,0.462,0.469,0.22125,0.36175,0.556,0.414,0.414,0.414
2,f1-score,0.493392,0.367519,0.261873,0.372314,0.572606,0.414,0.413541,0.413541
3,support,4000.0,4000.0,4000.0,4000.0,4000.0,0.414,20000.0,20000.0


In [None]:
svm_using_tfidf = pd.read_csv("svm.csv")
svm_using_tfidf

Unnamed: 0.1,Unnamed: 0,1,2,3,4,5,accuracy,macro avg,weighted avg
0,precision,0.563424,0.40489,0.424075,0.472624,0.639847,0.51355,0.500972,0.500972
1,recall,0.67625,0.3395,0.384,0.4165,0.7515,0.51355,0.51355,0.51355
2,f1-score,0.614703,0.369323,0.403044,0.442791,0.691193,0.51355,0.504211,0.504211
3,support,4000.0,4000.0,4000.0,4000.0,4000.0,0.51355,20000.0,20000.0


#### Question : What do you conclude from comparing performances for the models trained using the two different feature types (TF-IDF and your trained Word2Vec features)?
Answer : Simple model (Perceptron & SVM) trained using TF-IDF has better accuracy as compared to model trained using Word2Vec.

## 4. Feedforward Neural Networks 

### a) Train using average Word2Vec

In [26]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import SGD

In [27]:
model = Sequential()
model.add(Dense(50, input_shape = (300,),activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5, activation='softmax'))
sgd = SGD(0.01)
model.compile(loss="sparse_categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 50)                15050     
                                                                 
 dense_1 (Dense)             (None, 10)                510       
                                                                 
 dropout (Dropout)           (None, 10)                0         
                                                                 
 dense_2 (Dense)             (None, 5)                 55        
                                                                 
Total params: 15,615
Trainable params: 15,615
Non-trainable params: 0
_________________________________________________________________


In [28]:
X_train_vec = X_train.apply(lambda x: get_avg_wor2vec(x)).to_numpy()
X_test_vec = X_test.apply(lambda x: get_avg_wor2vec(x)).to_numpy()
X_train_vec = np.concatenate([np.concatenate(X_train_vec, axis=0)], axis=0).reshape(-1, 300)
X_test_vec = np.concatenate([np.concatenate(X_test_vec, axis=0)], axis=0).reshape(-1, 300)

In [29]:
Y_train_np = Y_train.apply(lambda x : x - 1)
Y_train_np = Y_train_np.to_numpy()
Y_test_np = Y_test.apply(lambda x : x - 1)
Y_test_np = Y_test_np.to_numpy()

In [30]:
model.fit(X_train_vec, Y_train_np, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f7ab2d54f50>

In [31]:
test_loss, test_acc = model.evaluate(X_test_vec, Y_test_np)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

Test Loss: 1.129124641418457
Test Accuracy: 0.5059999823570251


### b) Generate the input feature by concatenating the first 10 Word2Vec vectors for each review as the input feature

In [32]:
def get_concatenated_first10_feature_vector(dataset):
  feature_10_word2vec = []
  for reviews in dataset:
    words = reviews.split()
    max_words = 10
    review_embedding = []
    for word in words:
      if len(review_embedding) < max_words:
        word_vec = np.zeros(300)
        if word in wv_google:
          word_vec += wv_google[word]
        review_embedding.append(word_vec)
    if len(review_embedding) < max_words:
      while len(review_embedding) != max_words:
        review_embedding.append(np.zeros(300))
    review_embedding = np.concatenate(review_embedding)
    feature_10_word2vec.append(review_embedding)
  feature_10_word2vec = np.array(feature_10_word2vec)
  return feature_10_word2vec

In [33]:
model = Sequential()
model.add(Dense(50, input_shape = (3000,),activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5, activation='softmax'))
sgd = SGD(0.01)
model.compile(loss="sparse_categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 50)                150050    
                                                                 
 dense_4 (Dense)             (None, 10)                510       
                                                                 
 dropout_1 (Dropout)         (None, 10)                0         
                                                                 
 dense_5 (Dense)             (None, 5)                 55        
                                                                 
Total params: 150,615
Trainable params: 150,615
Non-trainable params: 0
_________________________________________________________________


In [34]:
X_train_10_word2vec = get_concatenated_first10_feature_vector(X_train)
X_test_10_word2vec = get_concatenated_first10_feature_vector(X_test)

In [35]:
model.fit(X_train_10_word2vec, Y_train_np, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f7ab25b4ed0>

In [36]:
test_loss, test_acc = model.evaluate(X_test_10_word2vec, Y_test_np)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

Test Loss: 3.709632396697998
Test Accuracy: 0.4054499864578247


### Question : What do you conclude by comparing accuracy values you obtain with those obtained in the “’Simple Models” section?
Answer : As compared to simple model (Perceptron test accuracy = 33.365% & SVM test accuacy = 48.51%) first version of FNN (trained on complete word2vec, test accuracy = 50.60%) performed better than the simple model.

Where as the second version of FNN (10 word2vec concatenated, test accuracy = 40.54%) performed better than the perceptron however Simple model SVM accuracy (SVM test accuacy = 48.51%) is better in this case.

## 5. Recurrent Neural Networks

### a) Simple RNN

In [37]:
def get_first20_feature_embedding(dataset):
  feature_vec_embedding = []
  for reviews in dataset:
    words = reviews.split()
    max_vocab = 20
    review_embedding = []
    for word in words:
      if len(review_embedding) < max_vocab:
        word_embedd = np.zeros(300)
        if word in wv_google:
          word_embedd += wv_google[word]
          review_embedding.append(word_embedd)
      else:
        break
    if len(review_embedding) < max_vocab:
      while len(review_embedding) != max_vocab:
        review_embedding.append(np.zeros(300))
    feature_vec_embedding.append(review_embedding)
  feature_vec_embedding = np.array(feature_vec_embedding)
  return feature_vec_embedding

In [38]:
X_train_vec_embedding = get_first20_feature_embedding(X_train)
X_test_vec_embedding = get_first20_feature_embedding(X_test)

In [39]:
# building RNN model
from keras.layers import SimpleRNN
model = keras.Sequential()
model.add(SimpleRNN(20, activation='relu'))
model.add(Dense(5, activation='softmax'))
sgd = SGD(0.001)
model.compile(loss="sparse_categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])
model.build(input_shape=(None, 20, 300))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, 20)                6420      
                                                                 
 dense_6 (Dense)             (None, 5)                 105       
                                                                 
Total params: 6,525
Trainable params: 6,525
Non-trainable params: 0
_________________________________________________________________


In [40]:
model.fit(X_train_vec_embedding, Y_train_np, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f7ab25734d0>

In [41]:
test_loss, test_acc = model.evaluate(X_test_vec_embedding, Y_test_np)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

Test Loss: 1.1829966306686401
Test Accuracy: 0.4791499972343445


### Question : What do you conclude by comparing accuracy values you obtain with those obtained with feedforward neural network models?
Answer =>
1. Part a) FNN model (test accuracy = 50.60%) performed slightly better than RNN (test accuracy = 47.91%)
2. Part b) FNN model (test accuracy = 40.54%) performed was not good, RNN accuracy is better

### b) GRU

In [42]:
# building RNN model
from keras.layers import GRU
model = keras.Sequential()
model.add(GRU(20, activation='relu'))
model.add(Dense(5, activation='softmax'))
sgd = SGD(0.001)
model.compile(loss="sparse_categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])
model.build(input_shape=(None, 20, 300))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 20)                19320     
                                                                 
 dense_7 (Dense)             (None, 5)                 105       
                                                                 
Total params: 19,425
Trainable params: 19,425
Non-trainable params: 0
_________________________________________________________________


In [43]:
model.fit(X_train_vec_embedding, Y_train_np, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f7aa1c46a50>

In [44]:
test_loss, test_acc = model.evaluate(X_test_vec_embedding, Y_test_np)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

Test Loss: 1.223007082939148
Test Accuracy: 0.4593000113964081


### Question: What do you conclude by comparing accuracy values you obtain with those obtained using simple RNN ?
Answer =>
Simple RNN (test accuracy = 47.91%) performed better on unseen test data than GRU (test accuracy = 45.93%)