In [3]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# Import data
from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf
# import keras
# mnist = keras.datasets.mnist
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

mnist = input_data.read_data_sets('data/', one_hot=True)   # 读取数据集

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


## 按照 Lenet 的结构搭建一个模型
![image.png](./images/18-Figure2.3-1.png)

In [4]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)    # 变量的初始值为截断正太分布
    """
    这里的 shape 是 [height, width, n_channels, n_filters], 即[卷积核长，卷积核宽，通道数，卷积核个数也就是输出通道数]
    """
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)    # 变量的初始值为截断正太分布
    return tf.Variable(initial)

def conv2d(x, W):
    """
    * **input**：待卷积的数据。格式要求为一个张量，[batch, in_height, in_width, in_channels]，分别表示为：批次数，图像高度，宽度，输入通道数。 
    * **filter**：卷积核。格式要求为 [filter_height, filter_width, in_channels, out_channels]，分别表示：卷积核的高度，宽度，输入通道数，输出通道数。
    * **strides**：一个长为 4 的 list. 表示每次卷积以后卷积窗口在 input 中滑动的距离。
    * **padding**：有 SAME 和 VALID 两种选项，表示是否要保留图像边上那一圈不完全卷积的部分。如果是SAME，则保留。
    * **use_cudnn_on_gpu**：是否使用 cudnn 加速。默认是 True。
    """
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    """
    * **value**: 一个 4D 张量，格式为 [batch, height, width, channels]，与 conv2d 中 input 格式一样。 
    * **ksize**: 长为 4 的 list，表示池化窗口的尺寸。
    * **strides**: 池化窗口的滑动值，与 conv2d 中的一样。
    * **padding**: 与 conv2d 中用法一样。
    """
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [5]:
# sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, [None, 784])
x_image = tf.reshape(x, [-1, 28, 28, 1])       # 将输入按照 conv2d 中 input 的格式来 reshape

In [7]:
# 网络结构
"""
# 第一层
# 卷积核(filter)的尺寸是 5*5, 通道数为 1，输出通道为 32，即 feature map 数目为32
# 又因为strides=[1,1,1,1] 所以单个通道的输出尺寸应该跟输入图像一样。即总的卷积输出应该为 ?*28*28*32
# 也就是单个通道输出为 28*28，共有32个通道,共有?个批次
# 在池化阶段，ksize=[1,2,2,1] 那么卷积结果经过池化以后的结果，其尺寸应该是？*14*14*32
"""
W_conv1 = weight_variable([5, 5, 1, 32])     # 卷积是在每个5*5的patch中算出32个特征，这里的 shape 分别表示是 patch大小（5x5），输入通道数目1，输出通道数目32
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.elu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

"""
# 第二层
# 卷积核5*5，输入通道为32，输出通道为64。
# 卷积前图像的尺寸为 ?*14*14*32， 卷积后为?*14*14*64
# 池化后，输出的图像尺寸为?*7*7*64
"""
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.elu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# 第三层 是个全连接层,输入维数7*7*64, 输出维数为1024
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.elu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)     # 这里使用了drop out,即随机安排一些cell输出值为0，可以防止过拟合
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# 第四层，输入1024维，输出10维，也就是具体的0~9分类
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)   # 使用softmax作为多分类激活函数
y_ = tf.placeholder(tf.float32, [None, 10])


# 定义损失函数和训练方法
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))  # 损失函数，交叉熵
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)  # 使用adam优化
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))  # 计算准确度
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [9]:
# 实际训练模型
sess = tf.InteractiveSession()             # 建立交互式会话
sess.run(tf.initialize_all_variables())        # 所有变量初始化

for i in xrange(20000):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})
        print("step %d, training accuracy %g" % (i, train_accuracy))
    train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})


print("test accuracy %g" % accuracy.eval(feed_dict={
    x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

Instructions for updating:
Use `tf.global_variables_initializer` instead.
step 0, training accuracy 0.14
step 100, training accuracy 0.84
step 200, training accuracy 0.94
step 300, training accuracy 0.96
step 400, training accuracy 0.96
step 500, training accuracy 0.9
step 600, training accuracy 0.86
step 700, training accuracy 0.92
step 800, training accuracy 0.98
step 900, training accuracy 0.96
step 1000, training accuracy 0.98
step 1100, training accuracy 0.94
step 1200, training accuracy 0.94
step 1300, training accuracy 0.92
step 1400, training accuracy 0.96
step 1500, training accuracy 1
step 1600, training accuracy 0.94
step 1700, training accuracy 0.94
step 1800, training accuracy 0.96
step 1900, training accuracy 1
step 2000, training accuracy 0.98
step 2100, training accuracy 0.98
step 2200, training accuracy 1
step 2300, training accuracy 0.98
step 2400, training accuracy 0.96
step 2500, training accuracy 0.96
step 2600, training accuracy 0.98
step 2700, training accuracy 1