In [1]:
%env KERAS_BACKEND=tensorflow

env: KERAS_BACKEND=tensorflow


In [2]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [3]:
# 讀入數據庫
from keras.datasets import imdb

Using TensorFlow backend.


In [4]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000)

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [5]:
print('訓練總筆數:', len(x_train))
print('測試總筆數:', len(x_test))

訓練總筆數: 25000
測試總筆數: 25000


#### 雖然RNN可以處理不同長度的輸入， 在寫程式時我們還是要
(1)設輸入文字長度的上限
(2)把每段文字都弄成一樣長, 太短的後面補上 0

In [9]:
from keras.preprocessing import sequence

In [10]:
x_train = sequence.pad_sequences(x_train, maxlen=150)
x_test = sequence.pad_sequences(x_test, maxlen=150)

### 建立神經網路模型

In [11]:
N = 5 # 文字要壓到 N 維
K = 6 # LSTM 有 K 個神經元

In [16]:
from keras.models import Sequential
from keras.layers import Dense,Embedding,Dropout
from keras.layers import LSTM

In [19]:
model = Sequential()
model.add(Embedding(10000, N))
model.add(Dropout(0.2))
model.add(LSTM(K))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [20]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, None, 5)           50000     
_________________________________________________________________
dropout_2 (Dropout)          (None, None, 5)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 6)                 288       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 50,295
Trainable params: 50,295
Non-trainable params: 0
_________________________________________________________________


In [21]:
model.fit(x_train,y_train,batch_size=32,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x23bd6d7a748>

In [22]:
score = model.evaluate(x_test,y_test)
print("loss =",score[0])
print("正確率 =",score[1])

loss = 0.3544991341543198
正確率 = 0.85844


### 選用不同激活函數試試看

In [23]:
model = Sequential()
model.add(Embedding(10000, N))
model.add(Dropout(0.2))
model.add(LSTM(K))
model.add(Dense(1,activation='relu'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [24]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, None, 5)           50000     
_________________________________________________________________
dropout_3 (Dropout)          (None, None, 5)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 6)                 288       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 7         
Total params: 50,295
Trainable params: 50,295
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.fit(x_train,y_train,batch_size=32,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x23be9867208>

In [26]:
score = model.evaluate(x_test,y_test)
print("loss =",score[0])
print("正確率 =",score[1])

loss = 0.5362305676174164
正確率 = 0.8494


### 把激活函數從sigmoid到relu正確率反而下降
### 提高N和K試試看

In [27]:
N = 10 # 文字要壓到 N 維
K = 12 # LSTM 有 K 個神經元

In [28]:
model = Sequential()
model.add(Embedding(10000, N))
model.add(Dropout(0.2))
model.add(LSTM(K))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [29]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, None, 10)          100000    
_________________________________________________________________
dropout_4 (Dropout)          (None, None, 10)          0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 12)                1104      
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 13        
Total params: 101,117
Trainable params: 101,117
Non-trainable params: 0
_________________________________________________________________


In [30]:
model.fit(x_train,y_train,batch_size=32,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x23bfe0d6a58>

In [31]:
score = model.evaluate(x_test,y_test)
print("loss =",score[0])
print("正確率 =",score[1])

loss = 0.3879389646339417
正確率 = 0.85596


### 正確率從 0.85844 到 0.85596，幾乎沒甚麼差
### 修改模型，多一層dropout

In [32]:
model = Sequential()
model.add(Embedding(10000, N))
model.add(Dropout(0.5))
model.add(LSTM(K))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [33]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, None, 10)          100000    
_________________________________________________________________
dropout_5 (Dropout)          (None, None, 10)          0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 12)                1104      
_________________________________________________________________
dropout_6 (Dropout)          (None, 12)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 13        
Total params: 101,117
Trainable params: 101,117
Non-trainable params: 0
_________________________________________________________________


In [34]:
model.fit(x_train,y_train,batch_size=32,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x23bfa2aa9e8>

In [35]:
score = model.evaluate(x_test,y_test)
print("loss =",score[0])
print("正確率 =",score[1])

loss = 0.3410197823524475
正確率 = 0.8644


### 最高正確率：0.8644