In [1]:
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM, Flatten, Dropout
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [4]:
# Vocabulary size is 2000
num_words = 2000
(X_train, y_train), (X_test, y_test) = imdb.load_data(path="imdb.npz",
                                                      num_words=num_words,
                                                      skip_top=0,
                                                      maxlen=None,
                                                      seed=113,
                                                      start_char=1,
                                                      oov_char=2,
                                                      index_from=3)

In [45]:
len(X_train)

25000

In [40]:
max_review_length = 250
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [46]:
embedding_vector_length = 32
model = Sequential()
model.add(Embedding(input_dim=num_words, output_dim=embedding_vector_length, input_length=max_review_length))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 250, 32)           64000     
_________________________________________________________________
dropout_1 (Dropout)          (None, 250, 32)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               8448      
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 81,025
Trainable params: 81,025
Non-trainable params: 0
__________________________________________________

In [47]:
train_history = model.fit(X_train, y_train, batch_size=32,
                          epochs=10, verbose=2,
                          validation_split=0.2)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 20000 samples, validate on 5000 samples
Epoch 1/10
 - 195s - loss: 0.4507 - accuracy: 0.7836 - val_loss: 0.3732 - val_accuracy: 0.8404
Epoch 2/10
 - 191s - loss: 0.3257 - accuracy: 0.8632 - val_loss: 0.3218 - val_accuracy: 0.8668
Epoch 3/10
 - 194s - loss: 0.2927 - accuracy: 0.8820 - val_loss: 0.3760 - val_accuracy: 0.8608
Epoch 4/10
 - 197s - loss: 0.2734 - accuracy: 0.8891 - val_loss: 0.3231 - val_accuracy: 0.8688
Epoch 5/10
 - 191s - loss: 0.2444 - accuracy: 0.9003 - val_loss: 0.3329 - val_accuracy: 0.8712
Epoch 6/10
 - 191s - loss: 0.2270 - accuracy: 0.9114 - val_loss: 0.3469 - val_accuracy: 0.8658
Epoch 7/10
 - 198s - loss: 0.2198 - accuracy: 0.9118 - val_loss: 0.3377 - val_accuracy: 0.8648
Epoch 8/10
 - 198s - loss: 0.2096 - accuracy: 0.9168 - val_loss: 0.3568 - val_accuracy: 0.8640
Epoch 9/10
 - 201s - loss: 0.1928 - accuracy: 0.9258 - val_loss: 0.3870 - val_accuracy: 0.8554
Epoch 10/10
 - 202s - loss: 0.1871 - accuracy: 0.9264 - val_loss: 0.4001 - val_accuracy: 0.8542


In [48]:
scores = model.evaluate(X_test, y_test, verbose=1)
scores[1]



0.8554400205612183

In [49]:
predict=model.predict_classes(X_test)
predict_classes=predict.reshape(len(X_test))

In [50]:
def get_original_text(i):
    word_to_id = imdb.get_word_index()
    word_to_id = {k:(v+3) for k,v in word_to_id.items()}
    word_to_id["<PAD>"] = 0
    word_to_id["<START>"] = 1
    word_to_id["<UNK>"] = 2

    id_to_word = {value:key for key,value in word_to_id.items()}
    return ' '.join(id_to_word[id] for id in X_test[i])

In [51]:
SentimentDict={1:'positive', 0:'negative'}
def display_test_sentiment(i):
    print(get_original_text(i))
    print('label: ', SentimentDict[y_test[i]], ', prediction: ', SentimentDict[predict_classes[i]])

In [54]:
display_test_sentiment(45)

<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <START> the above <UNK> was written by me when i used the nick of which is still my <UNK> <UNK> i still believe andy <UNK> character of <UNK> is the best <UNK> episodes ever and i watch this episode at least once a year as i consider to be a <UNK> man as he has many friends who love him <UNK> br br in case many of 