In [1]:
from keras.datasets import imdb

Using Theano backend.


## 数据预处理

In [2]:
(X_train, y_train), (X_test, y_test) = imdb.load_data()

In [3]:
from keras.preprocessing import sequence

In [4]:
# 使各序列长度都为400 
max_sequence_len = 400
X_train = sequence.pad_sequences(X_train, maxlen=max_sequence_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_sequence_len)

In [5]:
import numpy as np

In [6]:
# 计算词汇表大小
vocab_size = np.max([np.max(X_train[i]) for i in range (X_train.shape[0])]) + 1

## 嵌入层

In [7]:
from keras.models import Sequential
from keras.layers import Embedding

In [8]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=max_sequence_len))

## RNN层

In [9]:
from keras.layers import SimpleRNN

In [10]:
model.add(SimpleRNN(units=16))

## 全连接层

In [11]:
from keras.layers import Dense
from keras.layers import Dropout

In [12]:
model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.35))

## 输出层

In [13]:
model.add(Dense(units=1, activation='sigmoid'))

## 概要

In [14]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 400, 64)           5669568   
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 16)                1296      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               4352      
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 5,675,473
Trainable params: 5,675,473
Non-trainable params: 0
_________________________________________________________________


## 编译

In [15]:
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

## 训练

In [16]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=100)

Train on 25000 samples, validate on 25000 samples
Epoch 1/5

  100/25000 [..............................] - ETA: 49s - loss: 0.6920 - accuracy: 0.5100
  200/25000 [..............................] - ETA: 52s - loss: 0.6933 - accuracy: 0.4900
  300/25000 [..............................] - ETA: 49s - loss: 0.6906 - accuracy: 0.5200
  400/25000 [..............................] - ETA: 47s - loss: 0.6919 - accuracy: 0.5200
  500/25000 [..............................] - ETA: 45s - loss: 0.6941 - accuracy: 0.5000
  600/25000 [..............................] - ETA: 45s - loss: 0.6945 - accuracy: 0.4967
  700/25000 [..............................] - ETA: 44s - loss: 0.6948 - accuracy: 0.4914
  800/25000 [..............................] - ETA: 44s - loss: 0.6957 - accuracy: 0.4837
  900/25000 [>.............................] - ETA: 44s - loss: 0.6958 - accuracy: 0.4789
 1000/25000 [>.............................] - ETA: 44s - loss: 0.6951 - accuracy: 0.4840
 1100/25000 [>.........................



Epoch 2/5

  100/25000 [..............................] - ETA: 44s - loss: 0.2270 - accuracy: 0.9000
  200/25000 [..............................] - ETA: 43s - loss: 0.2636 - accuracy: 0.9000
  300/25000 [..............................] - ETA: 42s - loss: 0.3191 - accuracy: 0.8667
  400/25000 [..............................] - ETA: 42s - loss: 0.3379 - accuracy: 0.8625
  500/25000 [..............................] - ETA: 42s - loss: 0.3151 - accuracy: 0.8740
  600/25000 [..............................] - ETA: 42s - loss: 0.2940 - accuracy: 0.8867
  700/25000 [..............................] - ETA: 44s - loss: 0.2890 - accuracy: 0.8900
  800/25000 [..............................] - ETA: 43s - loss: 0.2813 - accuracy: 0.8925
  900/25000 [>.............................] - ETA: 43s - loss: 0.2763 - accuracy: 0.8967
 1000/25000 [>.............................] - ETA: 43s - loss: 0.2855 - accuracy: 0.8910
 1100/25000 [>.............................] - ETA: 43s - loss: 0.2816 - accuracy: 0.8918

 2300/25000 [=>............................] - ETA: 40s - loss: 0.2746 - accuracy: 0.8930
 2400/25000 [=>............................] - ETA: 40s - loss: 0.2740 - accuracy: 0.8929
 2500/25000 [==>...........................] - ETA: 40s - loss: 0.2724 - accuracy: 0.8932
 2600/25000 [==>...........................] - ETA: 40s - loss: 0.2764 - accuracy: 0.8912
 2700/25000 [==>...........................] - ETA: 39s - loss: 0.2826 - accuracy: 0.8874
 2800/25000 [==>...........................] - ETA: 39s - loss: 0.2839 - accuracy: 0.8875
 2900/25000 [==>...........................] - ETA: 39s - loss: 0.2841 - accuracy: 0.8862
 3000/25000 [==>...........................] - ETA: 39s - loss: 0.2826 - accuracy: 0.8873
 3100/25000 [==>...........................] - ETA: 39s - loss: 0.2826 - accuracy: 0.8877
 3200/25000 [==>...........................] - ETA: 39s - loss: 0.2816 - accuracy: 0.8884
 3300/25000 [==>...........................] - ETA: 38s - loss: 0.2809 - accuracy: 0.8882
 3400/2500



Epoch 3/5

  100/25000 [..............................] - ETA: 45s - loss: 0.1221 - accuracy: 0.9600
  200/25000 [..............................] - ETA: 44s - loss: 0.1932 - accuracy: 0.9150
  300/25000 [..............................] - ETA: 43s - loss: 0.2008 - accuracy: 0.9200
  400/25000 [..............................] - ETA: 43s - loss: 0.2047 - accuracy: 0.9250
  500/25000 [..............................] - ETA: 43s - loss: 0.2015 - accuracy: 0.9300
  600/25000 [..............................] - ETA: 43s - loss: 0.1977 - accuracy: 0.9300
  700/25000 [..............................] - ETA: 44s - loss: 0.2011 - accuracy: 0.9286
  800/25000 [..............................] - ETA: 43s - loss: 0.1936 - accuracy: 0.9312
  900/25000 [>.............................] - ETA: 43s - loss: 0.1898 - accuracy: 0.9344
 1000/25000 [>.............................] - ETA: 43s - loss: 0.1880 - accuracy: 0.9380
 1100/25000 [>.............................] - ETA: 42s - loss: 0.1891 - accuracy: 0.9355

 4500/25000 [====>.........................] - ETA: 37s - loss: 0.1943 - accuracy: 0.9289
 4600/25000 [====>.........................] - ETA: 36s - loss: 0.1946 - accuracy: 0.9285
 4700/25000 [====>.........................] - ETA: 36s - loss: 0.1926 - accuracy: 0.9294
 4800/25000 [====>.........................] - ETA: 36s - loss: 0.1930 - accuracy: 0.9296
 4900/25000 [====>.........................] - ETA: 36s - loss: 0.1924 - accuracy: 0.9296
 5000/25000 [=====>........................] - ETA: 36s - loss: 0.1926 - accuracy: 0.9288
 5100/25000 [=====>........................] - ETA: 35s - loss: 0.1905 - accuracy: 0.9294
 5200/25000 [=====>........................] - ETA: 35s - loss: 0.1902 - accuracy: 0.9296
 5300/25000 [=====>........................] - ETA: 35s - loss: 0.1909 - accuracy: 0.9294
 5400/25000 [=====>........................] - ETA: 35s - loss: 0.1915 - accuracy: 0.9291
 5500/25000 [=====>........................] - ETA: 35s - loss: 0.1900 - accuracy: 0.9298
 5600/2500



Epoch 4/5

  100/25000 [..............................] - ETA: 43s - loss: 0.1800 - accuracy: 0.9300
  200/25000 [..............................] - ETA: 46s - loss: 0.1503 - accuracy: 0.9450
  300/25000 [..............................] - ETA: 47s - loss: 0.1277 - accuracy: 0.9600
  400/25000 [..............................] - ETA: 46s - loss: 0.1140 - accuracy: 0.9625
  500/25000 [..............................] - ETA: 45s - loss: 0.1161 - accuracy: 0.9600
  600/25000 [..............................] - ETA: 44s - loss: 0.1134 - accuracy: 0.9600
  700/25000 [..............................] - ETA: 45s - loss: 0.1085 - accuracy: 0.9600
  800/25000 [..............................] - ETA: 47s - loss: 0.1082 - accuracy: 0.9625
  900/25000 [>.............................] - ETA: 47s - loss: 0.1048 - accuracy: 0.9656
 1000/25000 [>.............................] - ETA: 46s - loss: 0.1000 - accuracy: 0.9680
 1100/25000 [>.............................] - ETA: 45s - loss: 0.1074 - accuracy: 0.9636





Epoch 5/5

  100/25000 [..............................] - ETA: 46s - loss: 0.0713 - accuracy: 0.9600
  200/25000 [..............................] - ETA: 46s - loss: 0.0554 - accuracy: 0.9750
  300/25000 [..............................] - ETA: 47s - loss: 0.0594 - accuracy: 0.9800
  400/25000 [..............................] - ETA: 48s - loss: 0.0506 - accuracy: 0.9850
  500/25000 [..............................] - ETA: 47s - loss: 0.0430 - accuracy: 0.9880
  600/25000 [..............................] - ETA: 46s - loss: 0.0374 - accuracy: 0.9900
  700/25000 [..............................] - ETA: 46s - loss: 0.0367 - accuracy: 0.9900
  800/25000 [..............................] - ETA: 45s - loss: 0.0342 - accuracy: 0.9912
  900/25000 [>.............................] - ETA: 45s - loss: 0.0355 - accuracy: 0.9889
 1000/25000 [>.............................] - ETA: 45s - loss: 0.0387 - accuracy: 0.9870
 1100/25000 [>.............................] - ETA: 45s - loss: 0.0390 - accuracy: 0.9873





<keras.callbacks.callbacks.History at 0xd5d7438>

## 评估

In [17]:
scores = model.evaluate(X_test, y_test, verbose=1)
scores[1]


   32/25000 [..............................] - ETA: 13s
  128/25000 [..............................] - ETA: 13s
  256/25000 [..............................] - ETA: 11s
  384/25000 [..............................] - ETA: 11s
  544/25000 [..............................] - ETA: 10s
  704/25000 [..............................] - ETA: 9s 
  896/25000 [>.............................] - ETA: 9s
 1088/25000 [>.............................] - ETA: 8s
 1280/25000 [>.............................] - ETA: 8s
 1472/25000 [>.............................] - ETA: 8s
 1568/25000 [>.............................] - ETA: 8s
 1664/25000 [>.............................] - ETA: 8s
 1760/25000 [=>............................] - ETA: 9s
 1856/25000 [=>............................] - ETA: 9s
 2048/25000 [=>............................] - ETA: 8s
 2240/25000 [=>............................] - ETA: 8s
 2432/25000 [=>............................] - ETA: 8s
 2592/25000 [==>...........................] - ETA: 8s
 27



0.843559980392456