# Text Classification Using RNN, LSTM, GRU and, Bidirectional LSTM

#### Comparing the performance of different versions of these Recurrent Neural Networks
=======================================================================================

### Import required libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
import tensorflow as tf
from tensorflow import keras
import sklearn
import re
import tqdm


nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to C:\Users\Atharva
[nltk_data]     J\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Atharva
[nltk_data]     J\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\Atharva
[nltk_data]     J\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\Atharva
[nltk_data]     J\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

### Import data - Using the IMDB 50K reviews dataset

In [2]:
review_data = pd.read_csv("D:\\6461_AdvancedAIforBusinessApplications\\Practice\\Datasets\\imdb_movie_reviews_50k.csv", encoding='utf-8')

In [3]:
review_data['sentiment'] = pd.get_dummies(review_data['sentiment'])['positive']

In [4]:
review_data.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1


In [5]:
review_data.shape

(50000, 2)

### Preprocess raw text data

In [6]:
## stopword removal, stemming/lemmatization
stopwords = nltk.corpus.stopwords
lemmatizer = nltk.stem.WordNetLemmatizer()
    
def preprocess(review):
    review = re.sub('[^a-zA-Z0-9]', ' ', review)
    review = review.lower()
    review = review.split()
#     review = [lemmatizer.lemmatize(word) for word in review if word not in set(stopwords.words('english'))]
    review = ' '.join(review)
    return review

In [7]:
## preprocessing
review_data['review'] = review_data['review'].apply(preprocess)

In [8]:
review_data

Unnamed: 0,review,sentiment
0,one of the other reviewers has mentioned that ...,1
1,a wonderful little production br br the filmin...,1
2,i thought this was a wonderful way to spend ti...,1
3,basically there s a family where a little boy ...,0
4,petter mattei s love in the time of money is a...,1
...,...,...
49995,i thought this movie did a down right good job...,1
49996,bad plot bad dialogue bad acting idiotic direc...,0
49997,i am a catholic taught in parochial elementary...,0
49998,i m going to have to disagree with the previou...,0


In [9]:
text = np.array(review_data['review'])
labels = np.array(review_data['sentiment'])

### Train-Test split

In [10]:
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(text, labels, test_size=0.25, random_state=42)

In [11]:
vectorize_layer = keras.layers.experimental.preprocessing.TextVectorization(
    max_tokens = None,
    standardize = 'lower_and_strip_punctuation',
    split = 'whitespace',
    ngrams = None,
    output_mode = 'int',
    output_sequence_length = None
)

In [12]:
# apply it to the text data with "adapt"
vectorize_layer.adapt(X_train)

In [13]:
# check preprocessing results, such as vocabulary, 
vectorize_layer.get_vocabulary()

['',
 '[UNK]',
 'the',
 'and',
 'a',
 'of',
 'to',
 'is',
 'br',
 'it',
 'in',
 'i',
 'this',
 'that',
 's',
 'was',
 'as',
 'movie',
 'for',
 'with',
 'but',
 'film',
 'you',
 't',
 'on',
 'not',
 'are',
 'he',
 'his',
 'have',
 'one',
 'be',
 'all',
 'at',
 'they',
 'by',
 'an',
 'who',
 'so',
 'from',
 'like',
 'there',
 'or',
 'just',
 'her',
 'out',
 'about',
 'if',
 'has',
 'what',
 'some',
 'good',
 'can',
 'very',
 'more',
 'when',
 'she',
 'up',
 'no',
 'time',
 'my',
 'even',
 'would',
 'which',
 'only',
 'story',
 'really',
 'see',
 'their',
 'had',
 'me',
 'well',
 'we',
 'were',
 'much',
 'than',
 'bad',
 'do',
 'been',
 'get',
 'people',
 'other',
 'great',
 'also',
 'will',
 'into',
 'don',
 'because',
 'how',
 'him',
 'first',
 'most',
 'made',
 'them',
 'its',
 'then',
 'make',
 'could',
 'way',
 'movies',
 'too',
 'any',
 'after',
 'characters',
 'think',
 'watch',
 'character',
 'films',
 'seen',
 'two',
 'many',
 'being',
 'plot',
 'never',
 'love',
 'acting',
 'lif

In [14]:
len(vectorize_layer.get_vocabulary())

90212

In [15]:
# now use it to process some text
input_text = [['very good movie'], ['Mochen Yang']]
vectorize_layer(input_text)

<tf.Tensor: shape=(2, 3), dtype=int64, numpy=
array([[   53,    51,    17],
       [    1, 13758,     0]], dtype=int64)>

### Training an RNN Model

In [16]:
model_rnn = keras.Sequential()

model_rnn.add(vectorize_layer)

model_rnn.add(keras.layers.Embedding(
    input_dim = len(vectorize_layer.get_vocabulary()),
    output_dim = 64,
    mask_zero = True
))

model_rnn.add(keras.layers.SimpleRNN(128)) # see note below

model_rnn.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [17]:
# configure training / optimization
model_rnn.compile(loss = keras.losses.BinaryCrossentropy(),
                  optimizer='adam',
                  metrics=['accuracy'])

In [18]:
# training with 20% validation and 10 epochs.
model_rnn.fit(x = X_train, y = y_train, validation_split = 0.2,
              epochs=10, batch_size = 32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x149bf206f20>

In [19]:
model_rnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, None, 64)          5773568   
                                                                 
 simple_rnn (SimpleRNN)      (None, 128)               24704     
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 5,798,401
Trainable params: 5,798,401
Non-trainable params: 0
_________________________________________________________________


In [20]:
# try to make some predicitons
y_pred = model_rnn.predict(X_test)



In [22]:
y_pred

array([[0.9065595 ],
       [0.1845639 ],
       [0.3194253 ],
       ...,
       [0.11646684],
       [0.9944436 ],
       [0.9778764 ]], dtype=float32)

In [24]:
y_pred_final = [1 if x>=0.50 else 0 for x in y_pred]

In [26]:
y_pred_final

[1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,


In [27]:
y_test

array([1, 1, 0, ..., 1, 1, 1], dtype=uint8)

### Evaluate RNN

In [28]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [29]:
accuracy_score(y_test,y_pred_final)

0.77504

In [30]:
print(classification_report(y_pred_final,y_test))

              precision    recall  f1-score   support

           0       0.78      0.77      0.77      6301
           1       0.77      0.78      0.78      6199

    accuracy                           0.78     12500
   macro avg       0.78      0.78      0.78     12500
weighted avg       0.78      0.78      0.78     12500



### RNN with LSTM

In [31]:
model_lstm = keras.Sequential()

model_lstm.add(vectorize_layer)

model_lstm.add(keras.layers.Embedding(
    input_dim = len(vectorize_layer.get_vocabulary()),
    output_dim = 64,
    mask_zero = True
))

model_lstm.add(keras.layers.LSTM(128))

model_lstm.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [32]:
# configure training / optimization
model_lstm.compile(loss = keras.losses.BinaryCrossentropy(),
                   optimizer='adam',
                   metrics=['accuracy'])

In [33]:
# training with 20% validation and 10 epochs.
model_lstm.fit(x = X_train, y = y_train, validation_split = 0.2,
               epochs=10, batch_size = 32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x149c77522f0>

In [34]:
# try to make some predicitons
model_lstm.predict([['I hate this meal!'], ['I love this restaurant']])



array([[0.9303289 ],
       [0.32468414]], dtype=float32)

In [35]:
# try to make some predicitons
y_pred_lstm = model_lstm.predict(X_test)



In [37]:
y_pred_lstm_final = [1 if x>=0.50 else 0 for x in y_pred_lstm]

In [38]:
accuracy_score(y_test,y_pred_lstm_final)

0.88568

In [39]:
print(classification_report(y_pred_lstm_final,y_test))

              precision    recall  f1-score   support

           0       0.89      0.88      0.89      6286
           1       0.88      0.90      0.89      6214

    accuracy                           0.89     12500
   macro avg       0.89      0.89      0.89     12500
weighted avg       0.89      0.89      0.89     12500



### Train GRU within the same dataset

In [48]:
model_gru = keras.Sequential()

model_gru.add(vectorize_layer)

model_gru.add(keras.layers.Embedding(
    input_dim = len(vectorize_layer.get_vocabulary()),
    output_dim = 64,
    mask_zero = True
))

model_gru.add(keras.layers.GRU(128))

model_gru.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [49]:
# configure training / optimization
model_gru.compile(loss = keras.losses.BinaryCrossentropy(),
                  optimizer='adam',
                  metrics=['accuracy'])

In [51]:
# training with 20% validation and 10 epochs.
model_gru.fit(x = X_train, y = y_train, validation_split = 0.2,
              epochs=10, batch_size = 32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x149c7753ca0>

In [52]:
# try to make some predicitons
model_gru.predict([['I hate this meal!'], ['I love this restaurant']])



array([[0.87545353],
       [0.9212439 ]], dtype=float32)

In [53]:
model_gru.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding_2 (Embedding)     (None, None, 64)          5773568   
                                                                 
 gru (GRU)                   (None, 128)               74496     
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
Total params: 5,848,193
Trainable params: 5,848,193
Non-trainable params: 0
_________________________________________________________________


In [54]:
# try to make some predicitons
y_pred_gru = model_gru.predict(X_test)



In [55]:
y_pred_gru_final = [1 if x>=0.50 else 0 for x in y_pred_gru]

In [56]:
accuracy_score(y_test,y_pred_gru_final)

0.88848

In [57]:
print(classification_report(y_pred_gru_final,y_test))

              precision    recall  f1-score   support

           0       0.87      0.90      0.89      5985
           1       0.90      0.88      0.89      6515

    accuracy                           0.89     12500
   macro avg       0.89      0.89      0.89     12500
weighted avg       0.89      0.89      0.89     12500



### Train Bi-directional LSTM with the same dataset.

In [58]:
model_bilstm = keras.Sequential()

model_bilstm.add(vectorize_layer)

model_bilstm.add(keras.layers.Embedding(
    input_dim = len(vectorize_layer.get_vocabulary()),
    output_dim = 64,
    mask_zero = True
))

model_bilstm.add(keras.layers.Bidirectional(keras.layers.LSTM(128)))

model_bilstm.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [59]:
# configure training / optimization
model_bilstm.compile(loss = keras.losses.BinaryCrossentropy(),
                     optimizer='adam',
                     metrics=['accuracy'])

In [61]:
# training with 20% validation and 10 epochs.
model_bilstm.fit(x = X_train, y = y_train, validation_split = 0.2,
                 epochs = 10, batch_size = 32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x14988c7ada0>

In [63]:
# try to make some predicitons
model_bilstm.predict([['I hate this meal!'], ['I love this restaurant']])



array([[0.82132936],
       [0.88596606]], dtype=float32)

In [64]:
model_bilstm.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding_3 (Embedding)     (None, None, 64)          5773568   
                                                                 
 bidirectional (Bidirectiona  (None, 256)              197632    
 l)                                                              
                                                                 
 dense_3 (Dense)             (None, 1)                 257       
                                                                 
Total params: 5,971,457
Trainable params: 5,971,457
Non-trainable params: 0
_________________________________________________________________


In [65]:
# try to make some predicitons
y_pred_bilstm = model_bilstm.predict(X_test)



In [66]:
y_pred_bilstm_final = [1 if x>=0.50 else 0 for x in y_pred_bilstm]

In [67]:
accuracy_score(y_test,y_pred_bilstm_final)

0.87912

In [68]:
print(classification_report(y_pred_bilstm_final,y_test))

              precision    recall  f1-score   support

           0       0.84      0.90      0.87      5738
           1       0.91      0.86      0.88      6762

    accuracy                           0.88     12500
   macro avg       0.88      0.88      0.88     12500
weighted avg       0.88      0.88      0.88     12500

