# 使用CNN网络训练MNIST数据集

## 1.CNN网络的图示

### 1.1.卷积的图示(No Padding)

![卷积](./conv.gif "卷积")

### 2.一种CNN网络架构(DeepID)

![CNN网络架构](./cnn_model.png "CNN网络架构")

## 2.下载并载入MNIST数据集

In [3]:
# 下载mnist数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('/tmp/', one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /tmp/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /tmp/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /tmp/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /tmp/t10k-labels-idx1-ubyte.gz


## 3.定义CNN网络分类MNIST数据集

### 3.1.引入依赖包

In [6]:
import numpy as np
import tensorflow as tf

sess = tf.InteractiveSession()

### 3.2.定义网络参数

In [31]:
image_size = 28

num_classes = 10
batch_size  = 100

learning_rate  = 1e-4
training_iters = 20000 
display_step   = 10

filter_conf = [(5, 5, 1, 32), (5, 5, 32, 64)] # [(size, in_channel, output_channel)]

X = tf.placeholder(tf.float32, [None, image_size*image_size])
Y = tf.placeholder(tf.float32, [None, num_classes])
keep_dropout_prob = tf.placeholder(tf.float32)

### 3.3.定义CNN网络模型

In [32]:
def weight_variable(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1))

def bias_variable(shape):
    return tf.Variable(tf.constant(0.1, shape=shape))

def conv2d(x, W, bias):
    conv = tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    return tf.nn.relu(conv + bias)


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

def ConvNeuralNetwork(x):
    
    # 1.convolution + maxpool layer
    pool_layer = tf.reshape(x, [-1, image_size, image_size, 1]) # [batch_size, width, height, channel]
    for idx, (filter_size, filter_size, in_channel, output_channel) in enumerate(filter_conf):
        with tf.name_scope('conv_maxpool_layer_%d' % idx):
            filter_shape = [filter_size, filter_size, in_channel, output_channel]
            W = weight_variable(filter_shape)   # 注意分析为什么这里权重的shape是这样的——权值共享
            b = bias_variable([output_channel]) # 每一个filter的输出图只对应一个bias，因此总数是output_channel个
            conv_layer  = conv2d(pool_layer, W, b)
            pool_layer  = max_pool_2x2(conv_layer)

    # 2.fully-connected layer
    W_fc1 = weight_variable([7*7*64, 1024])
    b_fc1 = bias_variable([1024])
    pool_flat = tf.reshape(pool_layer, [-1, 7*7*64])
    fc_output = tf.nn.relu(tf.matmul(pool_flat, W_fc1) + b_fc1)
    
    # 3.dropout layer
    drop_output = tf.nn.dropout(fc_output, keep_dropout_prob)
    
    # 4.readout layer
    W_fc2 = weight_variable([1024, num_classes])
    b_fc2 = bias_variable([num_classes])
    output = tf.matmul(drop_output, W_fc2) + b_fc2
  
    return output

### 4.4.定义训练和评估函数

In [33]:
# train graph
predict = ConvNeuralNetwork(X)
cost    = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predict, labels=Y))
train   = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# evaluate graph
correct  = tf.equal(tf.argmax(predict, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

### 4.5.训练和测试

In [34]:
init = tf.global_variables_initializer()
sess.run(init)

init = tf.global_variables_initializer()
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
    batch_x, batch_y = mnist.train.next_batch(batch_size)
    # Reshape data to get 28 seq of 28 elements
    batch_x = batch_x.reshape((batch_size, image_size*image_size))
    # Run optimization op (backprop)
    sess.run(train, feed_dict={X: batch_x, Y: batch_y, keep_dropout_prob:0.5})
    if step % display_step == 0:
        # Calculate batch accuracy
        acc = sess.run(accuracy, feed_dict={X: batch_x, Y: batch_y, keep_dropout_prob:1.0})
        # Calculate batch loss
        loss = sess.run(cost, feed_dict={X: batch_x, Y: batch_y, keep_dropout_prob:1.0})
        print "Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc)
    step += 1
print "Optimization Finished!"

# Calculate accuracy for 128 mnist test images
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, image_size*image_size))
test_label = mnist.test.labels[:test_len]
print "Testing Accuracy:", sess.run(accuracy, feed_dict={X: test_data, Y: test_label, keep_dropout_prob:1.0})

Iter 1000, Minibatch Loss= 3.014650, Training Accuracy= 0.17000
Iter 2000, Minibatch Loss= 1.698202, Training Accuracy= 0.42000
Iter 3000, Minibatch Loss= 1.361540, Training Accuracy= 0.53000
Iter 4000, Minibatch Loss= 0.945507, Training Accuracy= 0.68000
Iter 5000, Minibatch Loss= 0.754464, Training Accuracy= 0.73000
Iter 6000, Minibatch Loss= 0.752066, Training Accuracy= 0.73000
Iter 7000, Minibatch Loss= 0.652411, Training Accuracy= 0.76000
Iter 8000, Minibatch Loss= 0.555475, Training Accuracy= 0.84000
Iter 9000, Minibatch Loss= 0.653533, Training Accuracy= 0.79000
Iter 10000, Minibatch Loss= 0.306340, Training Accuracy= 0.90000
Iter 11000, Minibatch Loss= 0.496620, Training Accuracy= 0.84000
Iter 12000, Minibatch Loss= 0.437500, Training Accuracy= 0.85000
Iter 13000, Minibatch Loss= 0.314562, Training Accuracy= 0.91000
Iter 14000, Minibatch Loss= 0.418810, Training Accuracy= 0.88000
Iter 15000, Minibatch Loss= 0.401880, Training Accuracy= 0.89000
Iter 16000, Minibatch Loss= 0.3362

## 5.使用keras库构建CNN网络分类MNIST数据集

### 5.1.引入依赖包

In [40]:
from __future__ import division
from keras.models import Sequential
from keras.layers import Activation, Flatten, Dropout, Dense
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.datasets import mnist
from keras.utils import np_utils
from keras import backend as K

### 5.2.定义CNN网络参数

In [65]:
image_size = 28

num_classes = 10
batch_size  = 100

learning_rate  = 1e-4
epoch = 1

filter_conf = [(5, 5, 1, 32), (5, 5, 32, 64)] # [(size, in_channel, output_channel)]

(X_train, y_train), (X_test, y_test) = mnist.load_data()
if K.image_dim_ordering() == 'th':
    X_train = X_train.reshape(X_train.shape[0], 1, image_size, image_size)
    X_test = X_test.reshape(X_test.shape[0], 1, image_size, image_size)
    input_shape = (1, image_size, image_size)
else:
    X_train = X_train.reshape(X_train.shape[0], image_size, image_size, 1)
    X_test = X_test.reshape(X_test.shape[0], image_size, image_size, 1)
    input_shape = (image_size, image_size, 1) # tensorflow: [width, height, channel]
    
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
Y_train = np_utils.to_categorical(y_train, num_classes)
Y_test = np_utils.to_categorical(y_test, num_classes)

### 5.3.定义CNN网络模型

In [69]:
def ConvNeuralNetworkKeras():

    # create model
    model = Sequential()
    
    # 1.convolution + max-pooling Layer
    for idx, (filter_size, filter_size, in_channel, output_channel) in enumerate(filter_conf):
        model.add(Convolution2D(output_channel, filter_size, filter_size, border_mode='same', input_shape=input_shape))
        model.add(Activation('relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))
    
    # 2.fully-connected layer
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu'))
    
    # 3.dropout layer
    model.add(Dropout(0.5))
    
    # 4.readout layer
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    
    # 5.compile model
    adam = Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    
    return model

### 5.4.训练及测试

In [72]:
model = ConvNeuralNetworkKeras()
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=epoch, verbose=1, validation_data=(X_test, Y_test))
score = model.evaluate(X_test, Y_test, verbose=0)

print 'Test score:', score[0]
print 'Test accuracy:', score[1]

Train on 60000 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.0510320704116
Test accuracy: 0.9833
