In [1]:
# 양방향 LSTM을 이용한 IMDF 감정 분류
from __future__ import print_function
import numpy as np
import pandas as pd
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb
from sklearn.metrics import accuracy_score,classification_report

In [2]:
# 최대 피처 제한
max_features = 15000
max_len = 300
batch_size = 64

In [3]:
# 데이터 로딩
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train observations')
print(len(x_test), 'test observations')

25000 train observations
25000 test observations


In [4]:
# 효율적인 연산을 위한 패드 배열
x_train_2 = sequence.pad_sequences(x_train, maxlen=max_len)
x_test_2 = sequence.pad_sequences(x_test, maxlen=max_len)
print('x_train shape:', x_train_2.shape)
print('x_test shape:', x_test_2.shape)

y_train = np.array(y_train)
y_test = np.array(y_test)

x_train shape: (25000, 300)
x_test shape: (25000, 300)


In [5]:
# 모델 구축
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len))
model.add(Bidirectional(LSTM(64)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

In [6]:
# 모델 아키텍처 출력
print (model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 128)          1920000   
_________________________________________________________________
bidirectional (Bidirectional (None, 128)               98816     
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 2,018,945
Trainable params: 2,018,945
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
# 모델 학습
model.fit(x_train_2, y_train,batch_size=batch_size,epochs=4,validation_split=0.2) # 학습하는데 시간이 오래 걸린다.
# 교재와는 다른 결과를 나타낸다
# <tensorflow.python.keras.callbacks.History at 0x173967b9580> 여기서의 '0x173967b9580' 값이 교재와는 다르다.

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x173967b9580>

In [8]:
# 모델 예측
y_train_predclass = model.predict_classes(x_train_2,batch_size=100)
y_test_predclass = model.predict_classes(x_test_2,batch_size=100)

y_train_predclass.shape = y_train.shape
y_test_predclass.shape = y_test.shape

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [9]:
# 모델 정확도 및 메트릭 계산
print (("\n\nLSTM Bidirectional Sentiment Classification  - Train accuracy:"),(round(accuracy_score(y_train,y_train_predclass),3)))
print ("\nLSTM Bidirectional Sentiment Classification of Training data\n",classification_report(y_train, y_train_predclass))
print ("\nLSTM Bidirectional Sentiment Classification - Train Confusion Matrix\n\n",pd.crosstab(y_train, y_train_predclass,rownames = ["Actuall"],colnames = ["Predicted"]))      

print (("\nLSTM Bidirectional Sentiment Classification  - Test accuracy:"),(round(accuracy_score(y_test,y_test_predclass),3)))
print ("\nLSTM Bidirectional Sentiment Classification of Test data\n",classification_report(y_test, y_test_predclass))
print ("\nLSTM Bidirectional Sentiment Classification - Test Confusion Matrix\n\n",pd.crosstab(y_test, y_test_predclass,rownames = ["Actuall"],colnames = ["Predicted"]))      



LSTM Bidirectional Sentiment Classification  - Train accuracy: 0.946

LSTM Bidirectional Sentiment Classification of Training data
               precision    recall  f1-score   support

           0       0.95      0.94      0.95     12500
           1       0.94      0.95      0.95     12500

    accuracy                           0.95     25000
   macro avg       0.95      0.95      0.95     25000
weighted avg       0.95      0.95      0.95     25000


LSTM Bidirectional Sentiment Classification - Train Confusion Matrix

 Predicted      0      1
Actuall                
0          11740    760
1            582  11918

LSTM Bidirectional Sentiment Classification  - Test accuracy: 0.856

LSTM Bidirectional Sentiment Classification of Test data
               precision    recall  f1-score   support

           0       0.86      0.85      0.86     12500
           1       0.85      0.86      0.86     12500

    accuracy                           0.86     25000
   macro avg       0.86  