## END TO END DEEP LEARNING PROJECT USING SIMPLE RNN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN,Dense

In [None]:
# Load the imdb dataset

max_features = 10000 #Vocabulary size
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words = max_features)


#Print the shape of the data 
print(f'Training data shape: {X_train.shape},Training labels shape: {X_train.shape}')

In [None]:
X_train[0] # its a onehot representtaion of the 1st sentence , every index is leass than 10,000. Since Vocabulary size = 10,000

In [None]:
## Inspect a sample review and its label
sample_review = X_train[0]
sample_label = y_train[0]

print(f'Sample review (as integer) : {sample_review}')
print(f'Sample label: { sample_label}')

In [None]:
## Mapping of words index back to the words(for our understanding)

word_index = imdb.get_word_index()
# word_index is a dictonary ,here word as a key and number as value. But we to reverse it .
word_index 
# make number as key and word as the value
reverse_word_index = {value:key for key, value in word_index.items()}
reverse_word_index

In [None]:

decode_review = ''
for index in sample_review:
    if index <3:
        decode_review=decode_review+' '+'?'
        print("?")
    else:
        decode_review=decode_review +' '+ reverse_word_index[index-3] 
        print(f' {reverse_word_index[index-3]}')

decode_review

In [None]:
# Another way of doing above this is 

decode_review  = ' '.join([reverse_word_index.get(i-3,'?') for i in sample_review])
decode_review

In [None]:
from tensorflow.keras.preprocessing import sequence
max_len = 500

# pre padding technique is used to make all the input size equal to the 500
X_train = sequence.pad_sequences(X_train, maxlen = max_len)
X_test = sequence.pad_sequences(X_test, maxlen = max_len)

In [None]:
X_train

In [None]:
X_test

In [None]:
## Train Simple RNN
model = Sequential()
# Add Embedding Layer to the model :=> which is Responsible to convert each word to vector of dimension, input dimension = 500 and output dimension = 128
model.add(Embedding(max_features,128,input_length =max_len))
model.summary()

In [None]:
# Now add simple RNN to the model of with  128 hidden layer
model.add(SimpleRNN(128,activation='relu'))


In [None]:
# Add outpurt layer to the model
model.add(Dense(1,activation = 'sigmoid'))

model.summary()

In [None]:
# when compiling the model we need to define the optimizer function, loss function we have to use 
# Here i'm using adam optimizer function , loss = binary_crossentropy
model.compile(optimizer = 'adam',loss= 'binary_crossentropy',metrics=['accuracy'])

In [None]:
##  Use Early Stopping
from tensorflow.keras.callbacks import EarlyStopping
# monitore the validation loss and wait for 10 epocs and store the weights whenever you find the best validation loss
earlystopping = EarlyStopping(monitor = 'val_loss',patience = 10, restore_best_weights = True)

In [None]:
## Train the model with early stopping
model.fit(
    X_train,y_train,epochs =10, batch_size = 32,
    validation_split = 0.2,
    callbacks = [earlystopping]
)


In [None]:
## Save model file
model.save('simple_rnn_imdb.h5')
