# Recurrent Net for Sentiment Classification

This RNN performs sentiment analysis on the IMDB review dataset.

In [None]:
import keras
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, Activation, SpatialDropout1D
from keras.layers import Embedding, SimpleRNN
from keras.callbacks import ModelCheckpoint
import os
from sklearn.metrics import roc_auc_score
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

#### Set Hyperparameters

In [None]:
output_dir = 'model_output/rnn'

epochs = 16
batch_size = 128

n_dim = 64 
n_unique_words = 10000 
max_review_length = 100 # lowered due to vanishing gradient over time
pad_type = trunc_type = 'pre'
drop_embed = 0.2 

n_rnn = 256 
drop_rnn = 0.2

#### Load Data

In [None]:
(X_train, y_train), (X_valid, y_valid) = imdb.load_data(num_words=n_unique_words)

#### Preprocess Data

In [None]:
X_train = pad_sequences(X_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
X_valid = pad_sequences(X_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)

#### Design Deep Net Architecture

In [None]:
model = Sequential()
model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))
model.add(SimpleRNN(n_rnn, dropout=drop_rnn, activation='ReLU_s'))

model.add(Dense(1, activation='sigmoid'))

In [None]:
model.summary()

#### Configure the Model

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
modelcheckpoint = ModelCheckpoint(filepath=output_dir+'/weights.{epoch:02d}.hdf5')

In [None]:
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

### Train the Model

In [None]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.4)#, callbacks=[modelcheckpoint])

#### Evaluate

In [None]:
# This is zero-indexed. So, Epoch 4 corresponds to file weights.03.hdf5
model.load_weights(output_dir+'/weights.03.hdf5')

In [None]:
y_hat = model.predict_proba(X_valid)

In [None]:
y_hat[0]

In [None]:
plt.hist(y_hat)
_ = plt.axvline(x=0.5, color='orange')

In [None]:
'''
    * Receive Operating Characteristic (ROC)
        * True Positive Rate
            * TPR = TP / (TP + TN)
        * False Positive Rate
            * FPR = FP / (FP + TN)
        * Plot the results of the TPR and FPR to get the Area Under the Curve (AUC).
'''
pct_auc = roc_auc_score(y_valid, y_hat) * 100

In [None]:
'{:0.2f}'.format(pct_auc)