In [1]:
from keras import layers
from keras import models
from keras.datasets import imdb
import numpy as np

In [6]:
DIMENSIONS=12000

In [2]:
(train_data, train_labels), (test_data, test_labels)  = imdb.load_data(num_words=DIMENSIONS)

In [3]:
def get_review_text(datapoint):
    
    word_index = imdb.get_word_index()
    reversed_word_index = dict([(value, key) for key,value in word_index.items()])
    decoded_review = ' '.join([reversed_word_index.get(c-3, '?') for c in datapoint])
    return decoded_review

In [4]:
get_review_text(train_data[2])

"? this has to be one of the worst films of the 1990s when my friends i were watching this film being the target audience it was aimed at we just sat watched the first half an hour with our jaws touching the floor at how bad it really was the rest of the time everyone else in the theatre just started talking to each other leaving or generally crying into their popcorn that they actually paid money they had ? working to watch this feeble excuse for a film it must have looked like a great idea on paper but on film it looks like no one in the film has a clue what is going on crap acting crap costumes i can't get across how ? this is to watch save yourself an hour a bit of your life"

In [7]:
'''
vectorize_inputs

- Accepts data of shape - (num_datapoints, datapoint_length)
- Fixes the dimension (here, same as the vocabulary size)
- Forms the one-hot encoding for each datapoint


'''
def vectorize_inputs(sequences, dimensions=DIMENSIONS):
    
    results = np.zeros((len(sequences), dimensions))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    
    return results
    

In [8]:
X_train = vectorize_inputs(train_data)
X_test = vectorize_inputs(test_data)

y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

In [10]:
model = models.Sequential()

model.add(layers.Dense(16, activation='relu', input_shape=(DIMENSIONS,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [11]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [13]:
from keras.callbacks import EarlyStopping
hist = model.fit(X_train, y_train, batch_size=512, epochs=20, validation_data=(X_test, y_test), callbacks=[EarlyStopping(mode='min', patience=2)])

Train on 25000 samples, validate on 25000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


### To do

- You used two hidden layers. Try using one or three hidden layers, and see how doing so affects validation and test accuracy
- Try using layers with more hidden units or fewer hidden units: `32` units, `64` units, and so on
- Try using the mse loss function instead of binary_crossentropy
- Try using the tanh activation (an activation that was popular in the early days of neural networks) instead of `relu`