# Sentiment Analyzer on a set of IMDB Movie ratings

In [1]:
from __future__ import division, print_function, absolute_import

import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb

# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
                                valid_portion=0.1)
trainX, trainY = train
testX, testY = test

# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

# Network building
net = tflearn.input_data([None, 100])
net = tflearn.embedding(net, input_dim=10000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                         loss='categorical_crossentropy')

In [2]:
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(trainX, trainY, n_epoch=5, validation_set=0.1, show_metric=True, batch_size=128)

Training Step: 794  | total loss: [1m[32m0.18779[0m[0m | time: 92.289s
| Adam | epoch: 005 | loss: 0.18779 - acc: 0.9362 -- iter: 20224/20250
Training Step: 795  | total loss: [1m[32m0.18241[0m[0m | time: 96.225s
| Adam | epoch: 005 | loss: 0.18241 - acc: 0.9387 | val_loss: 0.50240 - val_acc: 0.8173 -- iter: 20250/20250
--


In [3]:
import numpy as np
predictions = (np.array(model.predict(testX))[:,0] >= 0.5).astype(np.int_)
test_accuracy = np.mean(predictions == testY[:,0], axis=0)
print("Test accuracy: ", test_accuracy)

Test accuracy:  0.81
