In [5]:
from __future__ import print_function

import numpy as np
import pandas as pd

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb

from sklearn.metrics import accuracy_score, classification_report


In [6]:
max_features = 15000
max_len = 300
batch_size = 64

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train observations')
print(len(x_test), 'test observations')

25000 train observations
25000 test observations


In [7]:
x_train_2 = sequence.pad_sequences(x_train, maxlen=max_len)
x_test_2 = sequence.pad_sequences(x_test, maxlen=max_len)
print('x_train shape:', x_train_2.shape)
print('x_test shape:', x_test_2.shape)

y_train = np.array(y_train)
y_test = np.array(y_test)

x_train shape: (25000, 300)
x_test shape: (25000, 300)


In [10]:
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 300, 128)          1920000   
                                                                 
 bidirectional_2 (Bidirectio  (None, 128)              98816     
 nal)                                                            
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
Total params: 2,018,945
Trainable params: 2,018,945
Non-trainable params: 0
_________________________________________________________________
None


In [11]:
model.fit(x_train_2, y_train, batch_size=batch_size, epochs=4, validation_split=0.2)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x1f2f73d4f70>

In [12]:
y_train_predclass = np.round(model.predict(x_train_2)).astype(int)
y_test_predclass = np.round(model.predict(x_test_2)).astype(int)

y_train_predclass.shape = y_train.shape
y_test_predclass.shape = y_test.shape

print(('\n\nLSTM Bidrectional Sentiment Classification - Train Accuracy:'),(round(accuracy_score(y_train, y_train_predclass),3)))
print('\nLSTM Bidrectional Sentiment Classification of Traing data\n',classification_report(y_train, y_train_predclass))
print('\nLSTM Bidrectional Sentiment Classification - Train Confusion Matrix\n\n', pd.crosstab(y_train, y_train_predclass,rownames=['Actuall'],colnames=['Predicted']))
      
print(('\nLSTM Bidrectional Sentiment Classification - Test accuracy:'),(round(accuracy_score(y_test,y_test_predclass),3)))
print('\nLSTM Bidrectional Sentiment Classification of Test data\n',classification_report(y_test,y_test_predclass))
print('\nLSTM Bidrectional Sentiment Classification - Test Confusion Matrix\n\n', pd.crosstab(y_test,y_test_predclass,rownames=['Actuall'],colnames=['Predicted']))
      



LSTM Bidrectional Sentiment Classification - Train Accuracy: 0.961

LSTM Bidrectional Sentiment Classification of Traing data
               precision    recall  f1-score   support

           0       0.96      0.97      0.96     12500
           1       0.97      0.96      0.96     12500

    accuracy                           0.96     25000
   macro avg       0.96      0.96      0.96     25000
weighted avg       0.96      0.96      0.96     25000


LSTM Bidrectional Sentiment Classification - Train Confusion Matrix

 Predicted      0      1
Actuall                
0          12086    414
1            560  11940

LSTM Bidrectional Sentiment Classification - Test accuracy: 0.863

LSTM Bidrectional Sentiment Classification of Test data
               precision    recall  f1-score   support

           0       0.85      0.88      0.87     12500
           1       0.88      0.84      0.86     12500

    accuracy                           0.86     25000
   macro avg       0.86      0.86 