In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# Import data
from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf
import numpy as np
# import keras
# mnist = keras.datasets.mnist
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# mnist = input_data.read_data_sets('data/', one_hot=True)   # 读取数据集 这个 one_hot=True 要慎重选啊
mnist = input_data.read_data_sets('data/', reshape=False)   # 读取数据集

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz


![image.png](./images/18-Figure2.3-1.png)

In [2]:
# 读取训练数据及测试数据
train_data, train_label = mnist.train.images, mnist.train.labels
train_data = np.pad(train_data, ((0,0),(2,2),(2,2),(0,0)), 'constant')
test_data, test_label = mnist.test.images, mnist.test.labels
test_data = np.pad(test_data, ((0,0),(2,2),(2,2),(0,0)), 'constant')

# 打乱训练数据及测试数据
train_image_num = len(train_data)
train_image_index = np.arange(train_image_num)
np.random.shuffle(train_image_index)
train_data = train_data[train_image_index]
train_label = train_label[train_image_index]

test_image_num = len(test_data)
test_image_index = np.arange(test_image_num)
np.random.shuffle(test_image_index)
test_data = test_data[test_image_index]
test_label = test_label[test_image_index]

In [3]:
# 定义输入输出
w, h, c = 32, 32, 1

x = tf.placeholder(tf.float32, [None, w, h, c], name='x')
y_ = tf.placeholder(tf.int32, [None], name='y_')

![image.png](./images/18-Figure2.3-1.png)

In [4]:
def build_network(input_tensor, regularizer, is_train=True):
    # 第一层：卷积层，过滤器的尺寸为 5×5，深度为 6，不使用全 0 补充，步长为 1。
    # 尺寸变化：32×32×1->28×28×6
    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable('weight', [5, 5, c, 6], initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable('bias', [6], initializer=tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='VALID')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
    
    # 第二层：池化层，过滤器的尺寸为 2×2，使用全 0 补充，步长为 2。
    # 尺寸变化：28×28×6->14×14×6
    with tf.variable_scope('layer2-pool1'):
        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        
    # 第三层：卷积层，过滤器的尺寸为 5×5，深度为 16，不使用全 0 补充，步长为 1。
    # 尺寸变化：14×14×6->10×10×16
    with tf.variable_scope('layer3-conv2'):
        conv2_weights = tf.get_variable('weight', [5, 5, 6, 16], initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable('bias', [16], initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='VALID')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
        
    # 第四层：池化层，过滤器的尺寸为2×2，使用全0补充，步长为2。
    # 尺寸变化：10×10×6->5×5×16
    with tf.variable_scope('layer4-pool2'):
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    # 将第四层池化层的输出转化为第五层全连接层的输入格式。第四层的输出为5×5×16的矩阵，然而第五层全连接层需要的输入格式
    # 为向量，所以我们需要把代表每张图片的尺寸为5×5×16的矩阵拉直成一个长度为5×5×16的向量。
    # 举例说，每次训练64张图片，那么第四层池化层的输出的size为(64,5,5,16),拉直为向量，nodes=5×5×16=400,尺寸size变为(64,400)
    pool_shape = pool2.get_shape().as_list()
    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
    reshaped = tf.reshape(pool2, [-1, nodes])
    
    # 第五层：全连接层，nodes=5×5×16=400，400->120的全连接
    # 尺寸变化：比如一组训练样本为64，那么尺寸变化为64×400->64×120
    # 训练时，引入dropout，dropout在训练时会随机将部分节点的输出改为0，dropout可以避免过拟合问题。
    # 这和模型越简单越不容易过拟合思想一致，和正则化限制权重的大小，使得模型不能任意拟合训练数据中的随机噪声，以此达到避免过拟合思想一致。
    # 本文最后训练时没有采用dropout，dropout项传入参数设置成了False，因为训练和测试写在了一起没有分离，不过大家可以尝试。
    with tf.variable_scope('layer5-fc1'):
        fc1_weights = tf.get_variable('weight', [nodes, 120], initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer is not None:
            tf.add_to_collection('losses', regularizer(fc1_weights))
        fc1_biases = tf.get_variable('bias', [120], initializer=tf.constant_initializer(0.1))
        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        if is_train:
            fc = tf.nn.dropout(fc1, 0.5)
    # 第六层：全连接层，120->84的全连接
    # 尺寸变化：比如一组训练样本为64，那么尺寸变化为64×120->64×84
    with tf.variable_scope('layer6-fc2'):
        fc2_weights = tf.get_variable('weight', [120, 84], initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer is not None:
            tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable('bias', [84], initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases)
        if is_train:
            fc2 = tf.nn.dropout(fc2, 0.5)
    # 第七层：全连接层（近似表示），84->10的全连接
    # 尺寸变化：比如一组训练样本为64，那么尺寸变化为64×84->64×10。最后，64×10的矩阵经过softmax之后就得出了64张图片分类于每种数字的概率，
    # 即得到最后的分类结果。
    with tf.variable_scope('layer7-fc3'):
        fc3_weights = tf.get_variable('weight', [84, 10], initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None:
            tf.add_to_collection('losses', regularizer(fc3_weights))
        fc3_biases = tf.get_variable('bias', [10], initializer=tf.truncated_normal_initializer(stddev=0.1))
        logit = tf.matmul(fc2, fc3_weights) + fc3_biases
    return logit

In [5]:
# 正则化，交叉熵，平均交叉熵，损失函数，最小化损失函数，预测和实际equal比较，tf.equal函数会得到True或False，
# accuracy首先将tf.equal比较得到的布尔值转为float型，即True转为1.，False转为0，最后求平均值，即一组样本的正确率。
# 比如：一组5个样本，tf.equal比较为[True False True False False],转化为float型为[1. 0 1. 0 0],准确率为2./5=40%。
regularizer = tf.contrib.layers.l2_regularizer(0.001)
y = build_network(x, regularizer, False)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=y_)
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
correct_prediction = tf.equal(tf.cast(tf.argmax(y, 1), tf.int32), y_)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [6]:
# 每次获取 batch_size 个样本进行训练或测试
def get_batch(data, label, batch_size):
    for start_idx in range(0, len(data) - batch_size + 1, batch_size):
        slice_idx = slice(start_idx, start_idx + batch_size)
        yield data[slice_idx], label[slice_idx]

In [7]:
# 创建Session会话
with tf.Session() as sess:
    # 初始化所有变量(权值，偏置等)
    sess.run(tf.global_variables_initializer())
    
    # 将所有样本训练10次，每次训练中以64个为一组训练完所有样本。
    # train_num可以设置大一些。
    train_num = 10
    batch_size = 64
    
    for i in range(train_num):
        train_loss, train_acc, batch_num = 0, 0, 0
        for train_data_batch, train_label_batch in get_batch(train_data, train_label, batch_size):
            _, err, acc = sess.run([train_op, loss, accuracy], feed_dict={
                x: train_data_batch, y_: train_label_batch
            })
            train_loss += err
            train_acc += acc
            batch_num += 1
        print('train loss: ', train_loss / batch_num)
        print('train acc: ', train_acc / batch_num)

        test_loss, test_acc, batch_num = 0, 0, 0
        for test_data_batch, test_label_batch in get_batch(test_data, test_label, batch_size):
            err, acc = sess.run([loss, accuracy], feed_dict={
                x: test_data_batch, y_: test_label_batch
            })
            test_loss += err
            test_acc += acc
            batch_num += 1
        print('test loss: ', test_loss / batch_num)
        print('test acc: ', test_acc / batch_num)



train loss:  0.41479869461
train acc:  0.928114086147
test loss:  0.236933406729
test acc:  0.967948717949
train loss:  0.19638391984
train acc:  0.97440701397
test loss:  0.165096109829
test acc:  0.978465544872
train loss:  0.145247616811
train acc:  0.980409633295
test loss:  0.125717153247
test acc:  0.98297275641
train loss:  0.115630554896
train acc:  0.984229481956
test loss:  0.106165965685
test acc:  0.984074519231
train loss:  0.0979186176638
train acc:  0.986503201397
test loss:  0.0933006422546
test acc:  0.985376602564
train loss:  0.086919068219
train acc:  0.988049330617
test loss:  0.0848905564501
test acc:  0.986478365385
train loss:  0.0791418511113
train acc:  0.989104336438
test loss:  0.0821745820964
test acc:  0.986578525641
train loss:  0.0741027758193
train acc:  0.989722788126
test loss:  0.081212790468
test acc:  0.986077724359
train loss:  0.068760415923
train acc:  0.990832363213
test loss:  0.0766292449851
test acc:  0.98687900641
train loss:  0.06512609026