In [4]:
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import SimpleRNN, Activation, Dense
from keras.optimizers import Adam

TIME_STEPS = 28     # same as the height of the image

INPUT_SIZE = 28     # same as the width of the image

BATCH_SIZE = 50
BATCH_INDEX = 0

OUTPUT_SIZE = 10

CELL_SIZE = 50

LR = 0.001

MNIST里面的图像分辨率是28×28，为用RNN，将图像理解为序列化数据。

每一行作为一个输入单元，所以输入数据大小 **INPUT_SIZE = 28**； 

先是第1行输入，再是第2行，第3行，第4行，…，第28行输入， 这就是一张图片也就是一个序列，所以步长 **TIME_STEPS = 28**。

训练数据要进行 normalize，因为原始数据是 8bit 灰度图像, 所以需要除以 255。

In [14]:
# download the mnist to the path '~/.keras/datasets/' if it is the first time to be called
# X shape (60,000 28x28), y shape (10,000, )
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# data pre-processing
X_train = X_train.reshape(-1, 28, 28) / 255.      # normalize
X_test = X_test.reshape(-1, 28, 28) / 255.        # normalize
y_train = np_utils.to_categorical(y_train, num_classes=10)
y_test = np_utils.to_categorical(y_test, num_classes=10)

print(X_train.shape)
y_train.shape



(60000, 28, 28)


(60000, 10)

In [27]:
X_batch = X_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :, :]
Y_batch = y_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :]

print(X_batch.shape)
print(Y_batch.shape)

(50, 28, 28)
(50, 10)


## build model

In [8]:
# build RNN model
model = Sequential()

# RNN cell
model.add(SimpleRNN(
    # for batch_input_shape, if using tensorflow as the backend, we have to put None for the batch_size.
    # Otherwise, model.evaluate() will get error.
    batch_input_shape=(None, TIME_STEPS, INPUT_SIZE),       # Or: input_dim=INPUT_SIZE, input_length=TIME_STEPS,
    output_dim=CELL_SIZE,
    unroll=True,
))

W0826 10:56:25.415935 4616820160 deprecation_wrapper.py:119] From /Users/blair/.pyenv/versions/keras/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

  # Remove the CWD from sys.path while we load stuff.
W0826 10:56:25.455575 4616820160 deprecation_wrapper.py:119] From /Users/blair/.pyenv/versions/keras/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0826 10:56:25.471865 4616820160 deprecation_wrapper.py:119] From /Users/blair/.pyenv/versions/keras/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



In [10]:
# output layer
model.add(Dense(OUTPUT_SIZE))
model.add(Activation('softmax'))

# optimizer
adam = Adam(LR)
model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, 50)                3950      
_________________________________________________________________
dense_1 (Dense)              (None, 10)                510       
_________________________________________________________________
activation_1 (Activation)    (None, 10)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                110       
_________________________________________________________________
activation_2 (Activation)    (None, 10)                0         
Total params: 4,570
Trainable params: 4,570
Non-trainable params: 0
_________________________________________________________________


training，并输出 test 上的 loss 和 accuracy 结果

In [11]:
# training
for step in range(4001):
    # data shape = (batch_num, steps, inputs/outputs)
    X_batch = X_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :, :]
    Y_batch = y_train[BATCH_INDEX: BATCH_INDEX+BATCH_SIZE, :]
    cost = model.train_on_batch(X_batch, Y_batch)
    BATCH_INDEX += BATCH_SIZE
    BATCH_INDEX = 0 if BATCH_INDEX >= X_train.shape[0] else BATCH_INDEX

    if step % 500 == 0:
        cost, accuracy = model.evaluate(X_test, y_test, batch_size=y_test.shape[0], verbose=False)
        print('test cost: ', cost, 'test accuracy: ', accuracy)

W0826 14:34:28.317219 4616820160 deprecation.py:323] From /Users/blair/.pyenv/versions/keras/lib/python3.5/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0826 14:34:28.822501 4616820160 deprecation_wrapper.py:119] From /Users/blair/.pyenv/versions/keras/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



test cost:  2.311124086380005 test accuracy:  0.0957999974489212
test cost:  1.6327736377716064 test accuracy:  0.5228999853134155
test cost:  1.3161704540252686 test accuracy:  0.559499979019165
test cost:  1.1487971544265747 test accuracy:  0.5494999885559082
test cost:  1.0471760034561157 test accuracy:  0.5713000297546387
test cost:  1.0110148191452026 test accuracy:  0.5630999803543091
test cost:  0.9520753622055054 test accuracy:  0.5877000093460083
test cost:  0.8796814680099487 test accuracy:  0.604200005531311
test cost:  0.858435869216919 test accuracy:  0.6585999727249146
