<a href="https://colab.research.google.com/github/hyeryn/Natural-Language/blob/master/09_3_imdb_bidirectional_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Train a Bidirectional LSTM on the IMDB sentiment classification task.

#from __future__ import print_function
import numpy as np
import pandas as pd

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb

from sklearn.metrics import accuracy_score,classification_report


# Max features are limited
max_features = 15000
max_len = 300
batch_size = 64

# Loading data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train observations')
print(len(x_test), 'test observations')

# Pad sequences for computational efficienty
x_train_2 = sequence.pad_sequences(x_train, maxlen=max_len)
x_test_2 = sequence.pad_sequences(x_test, maxlen=max_len)
print('x_train shape:', x_train_2.shape)
print('x_test shape:', x_test_2.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)

#Model Building -> 임베딩 레이어는 차원을 128로 줄이고, 양방향 LSTM을 사용 + 감정을 0과 1로 모델링 하기 위한 고밀도 레이어로 끝냄
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

# Print model architecture
print (model.summary())

#Train the model
model.fit(x_train_2, y_train,batch_size=batch_size,epochs=4,validation_split=0.2)

#Model Prediction
y_train_predclass = model.predict_classes(x_train_2,batch_size=100)
y_test_predclass = model.predict_classes(x_test_2,batch_size=100)

y_train_predclass.shape = y_train.shape
y_test_predclass.shape = y_test.shape


# Model accuracies & metrics calculation
print (("\n\nLSTM Bidirectional Sentiment Classification  - Train accuracy:"),(round(accuracy_score(y_train,y_train_predclass),3)))
print ("\nLSTM Bidirectional Sentiment Classification of Training data\n",classification_report(y_train, y_train_predclass))
print ("\nLSTM Bidirectional Sentiment Classification - Train Confusion Matrix\n\n",pd.crosstab(y_train, y_train_predclass,rownames = ["Actuall"],colnames = ["Predicted"]))

print (("\nLSTM Bidirectional Sentiment Classification  - Test accuracy:"),(round(accuracy_score(y_test,y_test_predclass),3)))
print ("\nLSTM Bidirectional Sentiment Classification of Test data\n",classification_report(y_test, y_test_predclass))
print ("\nLSTM Bidirectional Sentiment Classification - Test Confusion Matrix\n\n",pd.crosstab(y_test, y_test_predclass,rownames = ["Actuall"],colnames = ["Predicted"]))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


25000 train observations
25000 test observations
x_train shape: (25000, 300)
x_test shape: (25000, 300)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 128)          1920000   
_________________________________________________________________
bidirectional (Bidirectional (None, 128)               98816     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 2,018,945
Trainable params: 2,018,945
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4






LSTM Bidirectional Sentiment Classification  - Train accuracy: 0.957

LSTM Bidirectional Sentiment Classification of Training data
               precision    recall  f1-score   support

           0       0.95      0.97      0.96     12500
           1       0.97      0.95      0.96     12500

    accuracy                           0.96     25000
   macro avg       0.96      0.96      0.96     25000
weighted avg       0.96      0.96      0.96     25000


LSTM Bidirectional Sentiment Classification - Train Confusion Matrix

 Predicted      0      1
Actuall                
0          12086    414
1            670  11830

LSTM Bidirectional Sentiment Classification  - Test accuracy: 0.859

LSTM Bidirectional Sentiment Classification of Test data
               precision    recall  f1-score   support

           0       0.84      0.89      0.86     12500
           1       0.88      0.83      0.85     12500

    accuracy                           0.86     25000
   macro avg       0.86  