# IMDB Movie Review Sentiment Analysis Using Keras

In [None]:
# Imports
import numpy as np
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
%matplotlib inline

np.random.seed(1987)

### Load the dataset

The IMDB movie review dataset comes preloaded with Keras. We have the ability to select the number of words for each review as a parameter.

In [None]:
# Loading the data (it's preloaded in Keras)
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=1000)

print(X_train.shape)
print(X_test.shape)

In [None]:
print(X_train[0])
print(y_train[0])

### Preprocessing the data

In [None]:
tokenizer = Tokenizer(num_words=1000)
X_train = tokenizer.sequences_to_matrix(X_train, mode='binary')
X_test = tokenizer.sequences_to_matrix(X_test, mode='binary')
print(X_train[0])

In [None]:
#X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=1987)

### Model Architecture

In [None]:
def build_classifier(optimizer):
    classifier = Sequential()
    classifier.add(Dense(units = 32, activation = 'relu', input_shape = X_train.shape[1:]))
    classifier.add(Dense(units = 64, activation = 'relu'))
    classifier.add(Dense(units = 128, activation = 'relu'))
    classifier.add(Dense(units = 2, activation = 'sigmoid'))
    classifier.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    return classifier

parameters = {'batch_size': [25, 32],
              'epochs': [100, 300],
              'optimizer': ['adam', 'rmsprop']}

In [None]:
classifier = KerasClassifier(build_fn = build_classifier)
grid_search = GridSearchCV(estimator = classifier,
                           param_grid = parameters,
                           scoring = 'accuracy',
                           cv = 5)

In [None]:
grid_search = grid_search.fit(X_train, y_train)