# Analyzing IMDB Data in Keras

In [None]:
# Imports
import numpy as np
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Activation,Dropout
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(42)

## 1. Loading the data
This dataset comes preloaded with Keras, so one simple command will get us training and testing data.

In [None]:
# Loading the data 
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)

print(x_train.shape)
print(x_test.shape)

## 2. Examining the data

In [None]:
print(x_train[0])
print(y_train[0])

## 3. One-hot encoding the output

In [None]:
# One-hot encoding the output into vector mode, each of length 1000
tokenizer = Tokenizer(num_words=1000)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print(x_train[0])

And we'll also one-hot encode the output.

In [None]:
# One-hot encoding the output
num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)

## 4. Building the  model architecture

In [None]:
# Build the model architecture
from keras.optimizers import Adam 
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers.core import Dense, Dropout


model = Sequential()

model.add(Dense(512,input_dim = 1000,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
# model.add(Dense(32, activation='relu'))
# model.add(Dropout(0.2))
model.add(Dense(num_classes,activation='softmax'))
model.summary()

opt = Adam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(loss= "categorical_crossentropy", optimizer = "rmsprop", metrics=['accuracy'])

## 5. Training the model

In [None]:
# Run the model
clf = model.fit(x_train, y_train, batch_size=128, epochs=5,validation_data=(x_test, y_test))

## 6. Evaluating the model

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: ", score[1])