In [1]:
%matplotlib inline

# TensorFlow 卷积神经网络(Convolutional Neural Network)

In [2]:
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six.moves import xrange


导入MNIST数据集

In [3]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('./MNIST_data', one_hot=True)
print(
        '训练集大小: %s;\n验证集大小: %s;\n测试集大小: %s.' % \
        (mnist.train.num_examples, mnist.validation.num_examples, mnist.test.num_examples)
)


Extracting ./MNIST_data/train-images-idx3-ubyte.gz
Extracting ./MNIST_data/train-labels-idx1-ubyte.gz
Extracting ./MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_data/t10k-labels-idx1-ubyte.gz
训练集大小: 55000;
验证集大小: 5000;
测试集大小: 10000.


### 计算机图形学computer vision
计算中，图像可被视为由若干大小相等的举证组成。这些矩阵被称为通道。例如RGB，表示由R红，G绿，B蓝三种颜色组成。二值图一般为单通道。

In [4]:
import tensorflow as tf

# hyper-parameter
training_epoch = 100
learning_rate = 1e-3
batch_size = 100
num_classes = 10
image_size = 28

with tf.name_scope('placehold'):
    # placehold - define variable
    x = tf.placeholder(dtype=tf.float32, shape=[None, image_size ** 2], name='x')
    # -1 for image number; 28,28 for image size; 1 for color channel
    input_tensor = tf.reshape(x, shape=(-1, image_size, image_size, 1))
    y = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name='y')

with tf.name_scope('initializer'):
    # build an initializer for initializing network
    initializer = tf.truncated_normal_initializer(stddev=0.01)


def linear(input_data, units=None, activation=tf.nn.relu, name=None):
    return tf.layers.dense(inputs=input_data,
                           units=units,
                           activation=activation,
                           kernel_initializer=initializer,
                           bias_initializer=initializer,
                           name='layer_%s' % (name))


def conv2d(input_data, filters=16, kernel_size=(5, 5),
           activation=tf.nn.relu, padding='SAME', name=None):
    with tf.name_scope('layer_%s' % (name)):
        input_data = tf.layers.conv2d(inputs=input_data,
                                      filters=filters,
                                      kernel_size=kernel_size,
                                      strides=(1, 1),
                                      activation=activation,
                                      padding=padding)
        input_data = tf.layers.max_pooling2d(input_data, pool_size=(2, 2), strides=(2, 2), padding=padding)
        return input_data


# conv1
conv1 = conv2d(input_data=input_tensor, filters=32, name='cnn1')
conv1 = tf.layers.dropout(inputs=conv1, rate=0.7)
# conv2
conv2 = conv2d(input_data=conv1, filters=64, name='cnn2')
# reshape
conv2 = tf.reshape(conv2, [-1, 7 * 7 * 64])
# fc1
fc1 = linear(input_data=conv2, units=1024, name='fc2')
fc1 = tf.layers.dropout(inputs=fc1, rate=0.7)
fc2 = linear(input_data=fc1, units=num_classes, name='fc3', activation=tf.nn.softmax)
y_ = fc2

loss_op = tf.losses.mean_squared_error(labels=y, predictions=y_)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss_op)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_, 1), tf.argmax(y, 1)), dtype=tf.float32))


training & evaluate ...

In [5]:
session = tf.InteractiveSession()
session.run(tf.global_variables_initializer())

batch_episode = len(mnist.train.images) // batch_size
for i in xrange(1, 1 + training_epoch):
    for _ in xrange(batch_episode):
        batch_x, batch_y = mnist.train.next_batch(batch_size)
        train_op.run(feed_dict={x: batch_x, y: batch_y})
    if i % 10 == 0:
        # 避免显存不足
        acc = 0.0
        for _ in xrange(batch_episode):
            batch_x, batch_y = mnist.test.next_batch(batch_size)
            batch_acc = session.run([accuracy], feed_dict={x: batch_x, y: batch_y})
            acc += batch_acc
        print('Epoch %s / %s, accuracy: %s' % (i, training_epoch, acc / batch_episode))


Epoch 10 / 100, loss: 0.000720134526364, accuracy: 0.995636359128
Epoch 20 / 100, loss: 0.000538832720966, accuracy: 0.996927267638
Epoch 30 / 100, loss: 0.000711422302202, accuracy: 0.996054540114
Epoch 40 / 100, loss: 0.000659064971865, accuracy: 0.996599995115
Epoch 50 / 100, loss: 0.000683497654355, accuracy: 0.996436360533
Epoch 60 / 100, loss: 0.000473322697859, accuracy: 0.997599994486
Epoch 70 / 100, loss: 0.000795798837745, accuracy: 0.995945450393
Epoch 80 / 100, loss: 0.000785153461347, accuracy: 0.996054541198
Epoch 90 / 100, loss: 0.00375222392155, accuracy: 0.981181825616
Epoch 100 / 100, loss: 0.000678467370861, accuracy: 0.996581812989


CNN的优点：<br>
1.CNN考虑的是区域的特征，对于图像有着其他网络能以比拟的性能（可以认为：凡是涉及到图像的模型，都能加上CNN）。矩阵也可以被视为是图像。<br>
2.网络参数少。能够加速网络的训练。<br>

缺点：<br>
1.若是CNN层数多，网络的学习速度也变慢。<br>

推荐：
1.画风迁移Neural Transfer:<br>
http://pytorch.org/tutorials/advanced/neural_style_tutorial.html<br>
2.经典的CNN网络（关注网页中的Convolutional nets模块）：<br>
http://yann.lecun.com/exdb/mnist/<br>

推荐资料：<br>
[Stanford UFLDL课程，Feature extraction using convolution](http://deeplearning.stanford.edu/wiki/index.php/Feature_extraction_using_convolution)<br>
[Stanford UFLDL课程，Pooling](http://deeplearning.stanford.edu/wiki/index.php/Pooling)<br>
[TensorFlow利用CNN实现MNIST识别](https://www.tensorflow.org/tutorials/layers)<br>
[TensorFlow利用CNN实现CIFAR-10识别](https://www.tensorflow.org/tutorials/deep_cnn)<br>