In [1]:
import numpy as np
import pandas as pd
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
import tensorflow as tf
from tensorflow import keras
from keras.utils import to_categorical
from keras.layers import LSTM, Dense, Dropout

Using TensorFlow backend.


1. Get the dataset 

In [2]:
from keras.datasets import imdb

In [3]:
(x_train ,y_train),(x_test,y_test) = imdb.load_data(num_words=10000)

In [4]:
print('---review---')
print(x_train[0])
print('---label---')
print(y_train[0])
# this gives us the first review but in number format, each number corresponds to a word
# the label can either be a 0 or a 1 which tells us if the review is good or bad 

---review---
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
---label---
1


In [6]:
word2id = imdb.get_word_index()
id2word = {i: word for word, i in word2id.items()}
print('---review converted to words---')
print([id2word.get(i, ' ') for i in x_train[0]])
print('---label---')
print(y_train[0])

---review converted to words---
['the', 'as', 'you', 'with', 'out', 'themselves', 'powerful', 'lets', 'loves', 'their', 'becomes', 'reaching', 'had', 'journalist', 'of', 'lot', 'from', 'anyone', 'to', 'have', 'after', 'out', 'atmosphere', 'never', 'more', 'room', 'and', 'it', 'so', 'heart', 'shows', 'to', 'years', 'of', 'every', 'never', 'going', 'and', 'help', 'moments', 'or', 'of', 'every', 'chest', 'visual', 'movie', 'except', 'her', 'was', 'several', 'of', 'enough', 'more', 'with', 'is', 'now', 'current', 'film', 'as', 'you', 'of', 'mine', 'potentially', 'unfortunately', 'of', 'you', 'than', 'him', 'that', 'with', 'out', 'themselves', 'her', 'get', 'for', 'was', 'camp', 'of', 'you', 'movie', 'sometimes', 'movie', 'that', 'with', 'scary', 'but', 'and', 'to', 'story', 'wonderful', 'that', 'in', 'seeing', 'in', 'character', 'to', 'of', '70s', 'musicians', 'with', 'heart', 'had', 'shadows', 'they', 'of', 'here', 'that', 'with', 'her', 'serious', 'to', 'have', 'does', 'when', 'from', 'w

2. Preprocessing the Data 

In [7]:
# we will use the first 60 words as the maxlen so as to avoid to keep things more controlled 
x_train = sequence.pad_sequences(x_train, maxlen=60)
x_test = sequence.pad_sequences(x_test, maxlen=60)

3. Build the Model


In [8]:
# we then start with our neural network model, starting with the embedding layer which converts input into vectors
model = Sequential()
model.add(Embedding(20000, 128)) # the number 128 is the output dimension  and 20000 is the no of words
# next we add LTSM layer 
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) # the dropout is to avoid overfitting
model.add(Dense(1, activation='sigmoid')) # we use sigmoid as we are working with a binary classification 





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [9]:
# we then compile the model
model.compile(loss='binary_crossentropy', 
             optimizer='adam', 
             metrics=['accuracy'])



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


4. Train the model 

In [10]:
from keras.callbacks import EarlyStopping
model.fit(x_train, y_train, batch_size=32, epochs=15,callbacks=[EarlyStopping(monitor='val_loss',verbose=2)],
          validation_data=(x_test, y_test))
# I've used 15 epochs but made use of early stopping to avoid overfitting by halting the process once it is most effective 

Train on 25000 samples, validate on 25000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 00003: early stopping


<keras.callbacks.History at 0x19b7a3ea240>

5. Test Model 

In [11]:
score, acc = model.evaluate(x_test, y_test, batch_size = 32, verbose = 2)
print('test score: ', score)
print('test accuracy: ', acc)
# the accuracy of the model is 82%

test score:  0.4370385038661957
test accuracy:  0.8174


6. predict something 

In [12]:
print('--- review---')
print(x_train[8],'\n')
word2id = imdb.get_word_index()
id2word = {i: word for word, i in word2id.items()}
print('---review with words---')
print([id2word.get(i, ' ') for i in x_train[8]])
print('\n---label---')
print(y_train[8])
# review is the number format for the review
# we reveiwed the 9th review
# review with words is the review converted into english
# and label is 1, which indicates that that review was good 

---review---
[ 285   36  140  143   38   76   53 3094 1301    4 6991   16   82    6
   87 3578   44 2527 7612    5  800    4 3033   11   35 1728   96   21
   14   22    9   76   53    7    6  406   65   13   43  219   12  639
   21   13   80  140    5  135   15   14    9   31    7    4  118 3672
   13   28  126  110] 

---review with words---
['dvd', 'from', 'through', "i'm", 'her', 'get', 'up', 'happily', 'date', 'of', 'lionel', 'with', 'other', 'is', 'him', 'pushed', 'has', 'columbo', 'criticized', 'to', "what's", 'of', 'thief', 'this', 'so', 'superior', 'too', 'not', 'as', 'you', 'it', 'get', 'up', 'br', 'is', 'others', 'their', 'was', 'out', 'least', 'that', 'hilarious', 'not', 'was', 'into', 'through', 'to', 'why', 'for', 'as', 'it', 'by', 'br', 'of', 'where', 'suits', 'was', 'one', 'your', 'life']

---label---
1
