# Using IMDB review dataset, perform sentiment classification using RNN.

In [1]:
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
vocabulary_size = 5000
(X_train, y_train), (X_test, y_test) =imdb.load_data(path = 'imdb.npz',num_words=vocabulary_size)
print('Loaded dataset with {} training samples, {} test samples'.format(len(X_train), len(X_test)))
# Map review back to original words
word2id = imdb.get_word_index(path ='imdb_word_index.json')
id2word = {i: word for word, i in word2id.items()}
print('---review with words---')
print([id2word.get(i, ' ') for i in X_train[6]])
print('---label---')
print(y_train[6])
# Maximum and minimum review lengths
print('Maximum review length: {}'
.format(len(max((X_train + X_test),
key=len))))
print('Minimum review length: {}'
.format(len(min((X_train + X_test),
key=len))))
# Pad sequences
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)
# Design RNN model
model = Sequential()
embedding_size = 32
model.add(Embedding(vocabulary_size, embedding_size,
input_length=max_words))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))
# Model summary
print(model.summary())
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam',
metrics=['accuracy'])
# Train model
batch_size = 64
num_epochs = 3
X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]
X_train2, y_train2 = X_train[batch_size:], y_train[batch_size:]
model.fit(X_train2, y_train2, validation_data=(X_valid, y_valid),
batch_size=batch_size, epochs=num_epochs)
# Evaluate model
scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])


Loaded dataset with 25000 training samples, 25000 test samples
---review with words---
['the', 'and', 'full', 'involving', 'to', 'impressive', 'boring', 'this', 'as', 'and', 'and', 'br', 'villain', 'and', 'and', 'need', 'has', 'of', 'costumes', 'b', 'message', 'to', 'may', 'of', 'props', 'this', 'and', 'and', 'concept', 'issue', 'and', 'to', "god's", 'he', 'is', 'and', 'unfolds', 'movie', 'women', 'like', "isn't", 'surely', "i'm", 'and', 'to', 'toward', 'in', "here's", 'for', 'from', 'did', 'having', 'because', 'very', 'quality', 'it', 'is', 'and', 'and', 'really', 'book', 'is', 'both', 'too', 'worked', 'carl', 'of', 'and', 'br', 'of', 'reviewer', 'closer', 'figure', 'really', 'there', 'will', 'and', 'things', 'is', 'far', 'this', 'make', 'mistakes', 'and', 'was', "couldn't", 'of', 'few', 'br', 'of', 'you', 'to', "don't", 'female', 'than', 'place', 'she', 'to', 'was', 'between', 'that', 'nothing', 'and', 'movies', 'get', 'are', 'and', 'br', 'yes', 'female', 'just', 'its', 'because', 'm



None
Epoch 1/3
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m437s[0m 1s/step - accuracy: 0.6701 - loss: 0.5957 - val_accuracy: 0.8750 - val_loss: 0.3229
Epoch 2/3
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m434s[0m 1s/step - accuracy: 0.8632 - loss: 0.3289 - val_accuracy: 0.9219 - val_loss: 0.2436
Epoch 3/3
[1m390/390[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m346s[0m 887ms/step - accuracy: 0.8991 - loss: 0.2601 - val_accuracy: 0.9531 - val_loss: 0.1420
Test accuracy: 0.8753600120544434
