# 1. 数据预处理

In [None]:
from keras.datasets import imdb

(train_data, train_label), (test_data, test_label) = imdb.load_data(num_words=10000)

In [None]:
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

y_train = np.asarray(train_label).astype('float32')
y_test = np.asarray(test_label).astype('float32')

# 2. 网络构建

In [None]:
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(lr=0.001),
              loss=losses.binary_crossentropy,
              metrics=[metrics.binary_accuracy])

# 3. 模型训练

In [None]:
x_val = x_train[:10000]
x_train_real = x_train[10000:]

y_val = y_train[:10000]
y_train_real = y_train[10000:]

history = model.fit(x_train_real, \
                    y_train_real, \
                    epochs=20, \
                    batch_size=512, \
                    validation_data=(x_val, y_val))

# 4. 训练历史

In [None]:
import matplotlib.pyplot as plt

history_dict = history.history

loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']

epochs = range(1, len(loss_values)+1)

plt.plot(epochs, loss_values, 'bo', label='loss')
plt.plot(epochs, val_loss_values, 'b', label='val_loss')
plt.title('Train and validation loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.legend()

plt.show()