<a href="https://colab.research.google.com/github/hikmatfarhat-ndu/CSC645/blob/master/sequence_modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import tensorflow as tf
print(tf.__version__)

2.0.0


#### START HERE Load and transform the IMDB review sentiment dataset

In [3]:
# A function to load and preprocess the IMDB dataset

def get_and_pad_imdb_dataset(num_words=10000, maxlen=None, index_from=2):
    from tensorflow.keras.datasets import imdb

    # Load the reviews
    (x_train, y_train), (x_test, y_test) = imdb.load_data(path='imdb.npz',
                                                          num_words=num_words,
                                                          skip_top=0,
                                                          maxlen=maxlen,
                                                          start_char=1,
                                                          oov_char=2,
                                                          index_from=index_from)

    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        maxlen=None,
                                                        padding='pre',
                                                        truncating='pre',
                                                        value=0)
    
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                           maxlen=None,
                                                           padding='pre',
                                                           truncating='pre',
                                                           value=0)
    return (x_train, y_train), (x_test, y_test)

In [4]:
# Load the dataset
num_words=5000
maxlen=300
(x_train,y_train),(x_test,y_test)=get_and_pad_imdb_dataset(num_words=num_words,maxlen=300)

In [5]:
# A function to get the dataset word index

def get_imdb_word_index(num_words=5000, index_from=2):
    imdb_word_index = tf.keras.datasets.imdb.get_word_index(
                                        path='imdb_word_index.json')
    imdb_word_index = {key: value + index_from for
                       key, value in imdb_word_index.items() if value <= num_words-index_from}
    return imdb_word_index

In [6]:
# Get the word index using get_imdb_word_index()
imdb_word_index=get_imdb_word_index(num_words=num_words)


#### Create a recurrent neural network model

In [7]:
# Get the maximum index value

max_index_value=max(imdb_word_index.values())
embedding_dim=128

In [8]:
# Using Sequential, build the model:
# 1. Embedding.
# 2. LSTM.
# 3. Dense.
model=tf.keras.Sequential([
     tf.keras.layers.Embedding(input_dim=max_index_value+1,output_dim=embedding_dim,mask_zero=True),
     tf.keras.layers.Dropout(0.3),
     tf.keras.layers.LSTM(units=60),
     tf.keras.layers.Dropout(0.3),
     tf.keras.layers.Dense(units=1,activation='sigmoid')                
])


#### Compile and fit the model

In [9]:
# Compile the model with binary cross-entropy loss

model.compile(loss='binary_crossentropy',metrics=['accuracy'],optimizer='adam')

In [10]:
# Fit the model and save its training history
history=model.fit(x_train,y_train,epochs=5,batch_size=500,validation_data=(x_test,y_test))


Train on 25000 samples, validate on 13501 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
print(model.metrics_names)


['loss', 'accuracy']


In [15]:
model.evaluate(x_test,y_test,verbose=1)
print("done")

done


#### Plot learning curves

In [None]:
# Plot the training and validation accuracy

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

history_dict = history.history

acc      = history_dict['accuracy']
val_acc  = history_dict['val_accuracy']
loss     = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(14,5))
plt.plot(epochs, acc, marker='.', label='Training acc')
plt.plot(epochs, val_acc, marker='.', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epoch')
plt.ylabel('Classification accuracy')
plt.legend(loc='lower right')
plt.ylim(0, 1);

#### Make predictions with the model

In [None]:
# View the first test data example sentence
# (invert the word index)
inv_imdb_word_index={val:key for key,val in imdb_word_index.items()}
[inv_imdb_word_index[index] for index in x_test[0] if index >2]



In [None]:
# Get the model prediction using model.predict()

model.predict(x_test[None,0,:])

In [None]:
# Get the corresponding label
y_test[0]
