# Sentiment classification

This example shows how to perform sentiment classification using bidirectional LSTM.

In [1]:
from __future__ import print_function
import numpy as np
import pandas as pd
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb
from sklearn.metrics import accuracy_score, classification_report

In [2]:
# Max features are limited
max_features = 15_000
max_len = 300
batch_size = 64

In [3]:
# Loading data.
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train observations')
print(len(x_test), 'test observations')

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])


25000 train observations
25000 test observations


  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [4]:
# Pad sequences for computational efficiency.
x_train_2 = sequence.pad_sequences(x_train, maxlen=max_len)
x_test_2 = sequence.pad_sequences(x_test, maxlen=max_len)
print('x_train shape:', x_train_2.shape)
print('x_test shape:', x_test_2.shape)

x_train shape: (25000, 300)
x_test shape: (25000, 300)


In [5]:
y_train = np.array(y_train)
y_test = np.array(y_test)

In [6]:
# Model building.
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

In [7]:
# Print model architecture.
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 128)          1920000   
_________________________________________________________________
bidirectional (Bidirectional (None, 128)               98816     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 2,018,945
Trainable params: 2,018,945
Non-trainable params: 0
_________________________________________________________________
None


In [8]:
# Train the model.
model.fit(x_train_2, y_train, batch_size=batch_size, epochs=4, validation_split=0.2)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x14c730550>

In [15]:
# Model prediction.
y_train_predclass = model.predict(x_train_2, batch_size=1_000)
y_test_predclass = model.predict(x_test_2, batch_size=1_000)

In [16]:
y_train_predclass.shape = y_train.shape
y_test_predclass.shape = y_test.shape

In [17]:
print('LSTM bidirectional sentiment classification - train accuracy:')
y_train_pred = np.where(y_train_predclass > 0.5, 1, 0)
print(round(accuracy_score(y_train, y_train_pred), 3))

print('LSTM bidirectional sentiment classification - training data:')
print(classification_report(y_train, y_train_pred))

print('LSTM bidirectional sentiment classification - train confusion matrix:')
print(pd.crosstab(y_train, y_train_pred, rownames=['Actual'], colnames=['Predicted']))

LSTM bidirectional sentiment classification - train accuracy:
0.955
LSTM bidirectional sentiment classification - training data:
              precision    recall  f1-score   support

           0       0.95      0.96      0.95     12500
           1       0.96      0.95      0.95     12500

    accuracy                           0.95     25000
   macro avg       0.95      0.95      0.95     25000
weighted avg       0.95      0.95      0.95     25000

LSTM bidirectional sentiment classification - train confusion matrix:
Predicted      0      1
Actual                 
0          12039    461
1            676  11824


In [18]:
print('LSTM bidirectional sentiment classification - test accuracy:')
y_test_pred = np.where(y_test_predclass > 0.5, 1, 0)
print(round(accuracy_score(y_test, y_test_pred), 3))

print('LSTM bidirectional sentiment classification - testing data:')
print(classification_report(y_test, y_test_pred))

print('LSTM bidirectional sentiment classification - test confusion matrix:')
print(pd.crosstab(y_test, y_test_pred, rownames=['Actual'], colnames=['Predicted']))

LSTM bidirectional sentiment classification - test accuracy:
0.853
LSTM bidirectional sentiment classification - testing data:
              precision    recall  f1-score   support

           0       0.84      0.88      0.86     12500
           1       0.87      0.83      0.85     12500

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000

LSTM bidirectional sentiment classification - test confusion matrix:
Predicted      0      1
Actual                 
0          10970   1530
1           2152  10348
