In [1]:
import numpy as np
np.random.seed(1337)  # for reproducibility

In [2]:
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import SimpleRNN, Activation, Dense
from keras.optimizers import Adam

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
TIME_STEPS = 28     # same as the height of the image
INPUT_SIZE = 28     # same as the width of the image
BATCH_SIZE = 50
BATCH_INDEX = 0
OUTPUT_SIZE = 10
CELL_SIZE = 50
LR = 0.001

In [4]:
# download the mnist to the path '~/.keras/datasets/' if it is the first time to be called
# X shape (60,000 28x28), y shape (10,000, )
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [5]:
# data pre-processing
X_train = X_train.reshape(-1, 28, 28) / 255.      # normalize
X_test = X_test.reshape(-1, 28, 28) / 255.        # normalize
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)

In [6]:
# build RNN model
model = Sequential()

In [7]:
# RNN cell
model.add(SimpleRNN(
    # for batch_input_shape, if using tensorflow as the backend, we have to put None for the batch_size.
    # Otherwise, model.evaluate() will get error.
    # batch_input_shape=(None,28,28)
    batch_input_shape=(None, TIME_STEPS, INPUT_SIZE),       # Or: input_dim=INPUT_SIZE, input_length=TIME_STEPS,
    output_dim=CELL_SIZE,
    unroll=True,
))

  import sys


In [8]:
# output layer
# output_size=10
model.add(Dense(OUTPUT_SIZE))  
model.add(Activation('softmax'))

In [9]:
adam = Adam(LR)
# compile接收三个参数
# 优化器optimizer：该参数可指定为已预定义的优化器名，如rmsprop、adagrad，或一个Optimizer类的对象
# 损失函数loss
# 指标列表metrics：对分类问题，我们一般将该列表设置为metrics=['accuracy']。这里需要参考性能评估
model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [14]:
# training
for step in range(4001):
    # data shape = (batch_num, steps, inputs/outputs)
    X_batch = X_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :, :]
    Y_batch = y_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :]
    cost = model.train_on_batch(X_batch, Y_batch)
    BATCH_INDEX += BATCH_SIZE
    BATCH_INDEX = 0 if BATCH_INDEX >= X_train.shape[0] else BATCH_INDEX
    if step % 500 == 0:
        cost, accuracy = model.evaluate(X_test, y_test, batch_size=y_test.shape[0], verbose=False)
        print('test cost: ', cost, 'test accuracy: ', accuracy)

test cost:  0.2465071827173233 test accuracy:  0.9279000163078308
test cost:  0.2183101773262024 test accuracy:  0.9386000037193298
test cost:  0.21624697744846344 test accuracy:  0.9376000165939331
test cost:  0.20868970453739166 test accuracy:  0.9426000118255615
test cost:  0.20225732028484344 test accuracy:  0.9415000081062317
test cost:  0.18744896352291107 test accuracy:  0.9466999769210815
test cost:  0.1942438781261444 test accuracy:  0.9452000260353088
test cost:  0.166709303855896 test accuracy:  0.9508000016212463
test cost:  0.20175358653068542 test accuracy:  0.9430000185966492
