## 数据预处理

In [1]:
from keras.datasets import imdb
from keras.preprocessing import sequence
import numpy as np

Using Theano backend.


In [2]:
(X_train, y_train), (X_test, y_test) = imdb.load_data()
# 使各序列长度都为400 
max_sequence_len = 400
X_train = sequence.pad_sequences(X_train, maxlen=max_sequence_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_sequence_len)
# 计算词汇表大小
vocab_size = np.max([np.max(X_train[i]) for i in range (X_train.shape[0])]) + 1

## 嵌入层

In [3]:
from keras.models import Sequential
from keras.layers import Embedding

In [4]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=max_sequence_len))

## LSTM层

In [5]:
from keras.layers import LSTM

In [6]:
model.add(LSTM(units=16))

## 全连接层

In [7]:
from keras.layers import Dense
from keras.layers import Dropout

In [8]:
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.35))

## 输出层

In [9]:
model.add(Dense(units=1, activation='sigmoid'))

## 概要

In [10]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 400, 64)           5669568   
_________________________________________________________________
lstm_1 (LSTM)                (None, 16)                5184      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               4352      
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 5,679,361
Trainable params: 5,679,361
Non-trainable params: 0
_________________________________________________________________


## 编译

In [11]:
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

## 训练

In [12]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=100)

Train on 25000 samples, validate on 25000 samples
Epoch 1/5

  100/25000 [..............................] - ETA: 2:25 - loss: 0.6931 - accuracy: 0.5000
  200/25000 [..............................] - ETA: 2:24 - loss: 0.6930 - accuracy: 0.5000
  300/25000 [..............................] - ETA: 2:21 - loss: 0.6935 - accuracy: 0.4533
  400/25000 [..............................] - ETA: 2:20 - loss: 0.6938 - accuracy: 0.4475
  500/25000 [..............................] - ETA: 2:18 - loss: 0.6938 - accuracy: 0.4440
  600/25000 [..............................] - ETA: 2:19 - loss: 0.6934 - accuracy: 0.4683
  700/25000 [..............................] - ETA: 2:17 - loss: 0.6933 - accuracy: 0.4757
  800/25000 [..............................] - ETA: 2:16 - loss: 0.6932 - accuracy: 0.4775
  900/25000 [>.............................] - ETA: 2:15 - loss: 0.6930 - accuracy: 0.5022
 1000/25000 [>.............................] - ETA: 2:15 - loss: 0.6927 - accuracy: 0.5150
 1100/25000 [>...............



Epoch 2/5

  100/25000 [..............................] - ETA: 3:01 - loss: 0.2092 - accuracy: 0.9300
  200/25000 [..............................] - ETA: 3:02 - loss: 0.2557 - accuracy: 0.9100
  300/25000 [..............................] - ETA: 3:03 - loss: 0.2351 - accuracy: 0.9200
  400/25000 [..............................] - ETA: 3:04 - loss: 0.2466 - accuracy: 0.9125
  500/25000 [..............................] - ETA: 3:05 - loss: 0.2542 - accuracy: 0.9060
  600/25000 [..............................] - ETA: 3:03 - loss: 0.2494 - accuracy: 0.9067
  700/25000 [..............................] - ETA: 3:03 - loss: 0.2420 - accuracy: 0.9071
  800/25000 [..............................] - ETA: 3:03 - loss: 0.2361 - accuracy: 0.9100
  900/25000 [>.............................] - ETA: 3:13 - loss: 0.2390 - accuracy: 0.9089
 1000/25000 [>.............................] - ETA: 3:24 - loss: 0.2345 - accuracy: 0.9110
 1100/25000 [>.............................] - ETA: 3:34 - loss: 0.2321 - accur

 2000/25000 [=>............................] - ETA: 3:22 - loss: 0.2127 - accuracy: 0.9205
 2100/25000 [=>............................] - ETA: 3:17 - loss: 0.2127 - accuracy: 0.9205
 2200/25000 [=>............................] - ETA: 3:13 - loss: 0.2161 - accuracy: 0.9195
 2300/25000 [=>............................] - ETA: 3:10 - loss: 0.2161 - accuracy: 0.9200
 2400/25000 [=>............................] - ETA: 3:07 - loss: 0.2163 - accuracy: 0.9196
 2500/25000 [==>...........................] - ETA: 3:03 - loss: 0.2164 - accuracy: 0.9188
 2600/25000 [==>...........................] - ETA: 3:00 - loss: 0.2150 - accuracy: 0.9192
 2700/25000 [==>...........................] - ETA: 2:57 - loss: 0.2136 - accuracy: 0.9200
 2800/25000 [==>...........................] - ETA: 2:55 - loss: 0.2131 - accuracy: 0.9207
 2900/25000 [==>...........................] - ETA: 2:52 - loss: 0.2115 - accuracy: 0.9214
 3000/25000 [==>...........................] - ETA: 2:50 - loss: 0.2094 - accuracy: 0.9220



Epoch 3/5

  100/25000 [..............................] - ETA: 2:32 - loss: 0.1095 - accuracy: 0.9600
  200/25000 [..............................] - ETA: 2:24 - loss: 0.1276 - accuracy: 0.9650
  300/25000 [..............................] - ETA: 2:23 - loss: 0.1206 - accuracy: 0.9633
  400/25000 [..............................] - ETA: 2:22 - loss: 0.1232 - accuracy: 0.9625
  500/25000 [..............................] - ETA: 2:22 - loss: 0.1311 - accuracy: 0.9620
  600/25000 [..............................] - ETA: 2:21 - loss: 0.1357 - accuracy: 0.9600
  700/25000 [..............................] - ETA: 2:21 - loss: 0.1260 - accuracy: 0.9629
  800/25000 [..............................] - ETA: 2:21 - loss: 0.1320 - accuracy: 0.9600
  900/25000 [>.............................] - ETA: 2:22 - loss: 0.1399 - accuracy: 0.9567
 1000/25000 [>.............................] - ETA: 2:22 - loss: 0.1400 - accuracy: 0.9570
 1100/25000 [>.............................] - ETA: 2:20 - loss: 0.1386 - accur

 4000/25000 [===>..........................] - ETA: 2:02 - loss: 0.1521 - accuracy: 0.9473
 4100/25000 [===>..........................] - ETA: 2:02 - loss: 0.1503 - accuracy: 0.9478
 4200/25000 [====>.........................] - ETA: 2:01 - loss: 0.1520 - accuracy: 0.9474
 4300/25000 [====>.........................] - ETA: 2:00 - loss: 0.1519 - accuracy: 0.9474
 4400/25000 [====>.........................] - ETA: 2:01 - loss: 0.1526 - accuracy: 0.9473
 4500/25000 [====>.........................] - ETA: 2:00 - loss: 0.1511 - accuracy: 0.9478
 4600/25000 [====>.........................] - ETA: 1:59 - loss: 0.1516 - accuracy: 0.9480
 4700/25000 [====>.........................] - ETA: 1:59 - loss: 0.1514 - accuracy: 0.9481
 4800/25000 [====>.........................] - ETA: 1:58 - loss: 0.1501 - accuracy: 0.9485
 4900/25000 [====>.........................] - ETA: 1:58 - loss: 0.1481 - accuracy: 0.9494
 5000/25000 [=====>........................] - ETA: 1:57 - loss: 0.1479 - accuracy: 0.9492



Epoch 4/5

  100/25000 [..............................] - ETA: 2:31 - loss: 0.1273 - accuracy: 0.9700
  200/25000 [..............................] - ETA: 2:31 - loss: 0.1140 - accuracy: 0.9550
  300/25000 [..............................] - ETA: 2:28 - loss: 0.0933 - accuracy: 0.9600
  400/25000 [..............................] - ETA: 2:27 - loss: 0.0971 - accuracy: 0.9625
  500/25000 [..............................] - ETA: 2:26 - loss: 0.0973 - accuracy: 0.9640
  600/25000 [..............................] - ETA: 2:24 - loss: 0.1096 - accuracy: 0.9633
  700/25000 [..............................] - ETA: 2:24 - loss: 0.1074 - accuracy: 0.9657
  800/25000 [..............................] - ETA: 2:22 - loss: 0.1006 - accuracy: 0.9675
  900/25000 [>.............................] - ETA: 2:22 - loss: 0.0929 - accuracy: 0.9711
 1000/25000 [>.............................] - ETA: 2:22 - loss: 0.0982 - accuracy: 0.9690
 1100/25000 [>.............................] - ETA: 2:21 - loss: 0.1012 - accur





Epoch 5/5

  100/25000 [..............................] - ETA: 2:17 - loss: 0.0991 - accuracy: 0.9600
  200/25000 [..............................] - ETA: 2:21 - loss: 0.0731 - accuracy: 0.9750
  300/25000 [..............................] - ETA: 2:19 - loss: 0.0678 - accuracy: 0.9800
  400/25000 [..............................] - ETA: 2:19 - loss: 0.0732 - accuracy: 0.9775
  500/25000 [..............................] - ETA: 2:18 - loss: 0.0722 - accuracy: 0.9800
  600/25000 [..............................] - ETA: 2:18 - loss: 0.0663 - accuracy: 0.9817
  700/25000 [..............................] - ETA: 2:17 - loss: 0.0769 - accuracy: 0.9771
  800/25000 [..............................] - ETA: 2:16 - loss: 0.0885 - accuracy: 0.9712
  900/25000 [>.............................] - ETA: 2:16 - loss: 0.0854 - accuracy: 0.9711
 1000/25000 [>.............................] - ETA: 2:15 - loss: 0.0835 - accuracy: 0.9730
 1100/25000 [>.............................] - ETA: 2:14 - loss: 0.0867 - accur





<keras.callbacks.callbacks.History at 0x10b50128>

## 评估

In [13]:
scores = model.evaluate(X_test, y_test, verbose=1)
scores[1]


   32/25000 [..............................] - ETA: 52s
   64/25000 [..............................] - ETA: 50s
   96/25000 [..............................] - ETA: 49s
  128/25000 [..............................] - ETA: 50s
  160/25000 [..............................] - ETA: 49s
  192/25000 [..............................] - ETA: 49s
  224/25000 [..............................] - ETA: 49s
  256/25000 [..............................] - ETA: 50s
  288/25000 [..............................] - ETA: 51s
  320/25000 [..............................] - ETA: 51s
  352/25000 [..............................] - ETA: 51s
  384/25000 [..............................] - ETA: 51s
  416/25000 [..............................] - ETA: 51s
  448/25000 [..............................] - ETA: 51s
  480/25000 [..............................] - ETA: 51s
  512/25000 [..............................] - ETA: 50s
  544/25000 [..............................] - ETA: 50s
  576/25000 [..............................] - 

 4704/25000 [====>.........................] - ETA: 40s
 4736/25000 [====>.........................] - ETA: 40s
 4768/25000 [====>.........................] - ETA: 40s
 4800/25000 [====>.........................] - ETA: 40s
 4832/25000 [====>.........................] - ETA: 40s
 4864/25000 [====>.........................] - ETA: 39s
 4896/25000 [====>.........................] - ETA: 39s
 4928/25000 [====>.........................] - ETA: 39s
 4960/25000 [====>.........................] - ETA: 39s
 4992/25000 [====>.........................] - ETA: 39s
 5024/25000 [=====>........................] - ETA: 39s
 5056/25000 [=====>........................] - ETA: 39s
 5088/25000 [=====>........................] - ETA: 39s
 5120/25000 [=====>........................] - ETA: 39s
 5152/25000 [=====>........................] - ETA: 39s
 5184/25000 [=====>........................] - ETA: 39s
 5216/25000 [=====>........................] - ETA: 39s
 5248/25000 [=====>........................] - E









0.8676000237464905