In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### 1. 讀入深度學習套件

In [3]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.datasets import imdb

### 2.讀入數據

In [4]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000) # 常用字數量

### 3. 資料處理 (修改資料長度為80)

In [5]:
x_train = sequence.pad_sequences(x_train, maxlen=80) #將每筆評論資料長度設定為80
x_test = sequence.pad_sequences(x_test, maxlen=80)

### 4. step 01: 打造一個函數學習機

In [6]:
model = Sequential()

In [7]:
model.add(Embedding(10000, 128)) 
# 全連結神經網路
# one-hot encoding 將1w維的字壓到指定的維度128

### 修改LSTM層數為100、dropout為0.1

In [8]:
model.add(LSTM(100, dropout=0.1, recurrent_dropout=0.1)) 
# 激發函數已指定tanh, sigmoid
# dropout設定特定百分比數量之神經元無法參與)避免每個神經元分工背答案產生Overfitting

In [9]:
model.add(Dense(1, activation='sigmoid'))
# 輸出值介於0~1

In [10]:
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 128)         1280000   
_________________________________________________________________
lstm (LSTM)                  (None, 100)               91600     
_________________________________________________________________
dense (Dense)                (None, 1)                 101       
Total params: 1,371,701
Trainable params: 1,371,701
Non-trainable params: 0
_________________________________________________________________


In [14]:
(128+100+1)*4*100
# 1個LSTM神經元包含4個小神經元(3gates+本身像RNN的小神經元) 
# 每個小神經元共(128+100+1)個參數
# 共1個LSTM

91600

### 5. step 02: 訓練(修改batch_size為64、epochs為8)

In [15]:
model.fit(x_train, y_train, batch_size=64, epochs=8,
         validation_data=(x_test, y_test))
#validation 以測試資料計算誤差，但仍以訓練資料學習狀況來預測測試資料之結果

Train on 25000 samples, validate on 25000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<tensorflow.python.keras.callbacks.History at 0x1e017c1b8d0>