In [0]:
!pip install tensorlayer
import time

import numpy as np
import tensorflow as tf
import tensorlayer as tl
from tensorlayer.layers import set_keep

def main_test_cnn_layer():
    """Reimplementation of the TensorFlow official MNIST CNN tutorials:
    - https://www.tensorflow.org/versions/r0.8/tutorials/mnist/pros/index.html
    - https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/image/mnist/convolutional.py
    More TensorFlow official CNN tutorials can be found here:
    - tutorial_cifar10.py
    - https://www.tensorflow.org/versions/master/tutorials/deep_cnn/index.html
    - For simplified CNN layer see "Convolutional layer (Simplified)"
      in read the docs website.
    """
    X_train, y_train, X_val, y_val, X_test, y_test = \
                    tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))

    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int64)
    X_val = np.asarray(X_val, dtype=np.float32)
    y_val = np.asarray(y_val, dtype=np.int64)
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int64)

    print('X_train.shape', X_train.shape)
    print('y_train.shape', y_train.shape)
    print('X_val.shape', X_val.shape)
    print('y_val.shape', y_val.shape)
    print('X_test.shape', X_test.shape)
    print('y_test.shape', y_test.shape)
    print('X %s   y %s' % (X_test.dtype, y_test.dtype))

    sess = tf.InteractiveSession()

    # Define the batchsize at the begin, you can give the batchsize in x and y_
    # rather than 'None', this can allow TensorFlow to apply some optimizations
    # – especially for convolutional layers.
    batch_size = 128

    
    #batch_size可以被设为None,但是设置为具体数字时tensorflow能够为此进行一定的性能优化
    x = tf.placeholder(tf.float32, shape=[batch_size, 28, 28, 1])   # [batch_size, height, width, channels]
    y_ = tf.placeholder(tf.int64, shape=[batch_size,])

    network = tl.layers.InputLayer(x, name='input')
    ## Professional conv API for tensorflow user
    # network = tl.layers.Conv2dLayer(network,
    #                     act = tf.nn.relu,
    #                     shape = [5, 5, 1, 32],  # 32 features for each 5x5 patch
    #                     strides=[1, 1, 1, 1],
    #                     padding='SAME',
    #                     name ='cnn1')     # output: (?, 28, 28, 32)
    # network = tl.layers.PoolLayer(network,
    #                     ksize=[1, 2, 2, 1],
    #                     strides=[1, 2, 2, 1],
    #                     padding='SAME',
    #                     pool = tf.nn.max_pool,
    #                     name ='pool1',)   # output: (?, 14, 14, 32)
    # network = tl.layers.Conv2dLayer(network,
    #                     act = tf.nn.relu,
    #                     shape = [5, 5, 32, 64], # 64 features for each 5x5 patch
    #                     strides=[1, 1, 1, 1],
    #                     padding='SAME',
    #                     name ='cnn2')     # output: (?, 14, 14, 64)
    # network = tl.layers.PoolLayer(network,
    #                     ksize=[1, 2, 2, 1],
    #                     strides=[1, 2, 2, 1],
    #                     padding='SAME',
    #                     pool = tf.nn.max_pool,
    #                     name ='pool2',)   # output: (?, 7, 7, 64)
    ## Simplified conv API for beginner (the same with the above layers)
    network = tl.layers.Conv2d(network, 32, (5, 5), (1, 1),
            act=tf.nn.relu, padding='SAME', name='cnn1')
    network = tl.layers.MaxPool2d(network, (2, 2), (2, 2),
            padding='SAME', name='pool1')
    network = tl.layers.Conv2d(network, 64, (5, 5), (1, 1),
            act=tf.nn.relu, padding='SAME', name='cnn2')
    network = tl.layers.MaxPool2d(network, (2, 2), (2, 2),
            padding='SAME', name='pool2')
    ## end of conv
    #将二维的tensor转化为一维向量，为之后的全连接层做准备
    network = tl.layers.FlattenLayer(network, name='flatten')
    network = tl.layers.DropoutLayer(network, keep=0.5, name='drop1')
    network = tl.layers.DenseLayer(network, 256, act=tf.nn.relu, name='relu1')
    network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
    network = tl.layers.DenseLayer(network, 10, act=tf.identity, name='output')

    y = network.outputs

    #定义损失函数，这里将交叉熵设为损失函数（详见《深度学习》6.2.1）
    cost = tl.cost.cross_entropy(y, y_, 'cost')

    #返回表示预测值与真实值是否相等的一个bool向量，tf.argmax返回向量中最大值的坐标（在此例中即为预测的数字）
    correct_prediction = tf.equal(tf.argmax(y, 1), y_)
    #tf.reduce_mean()以取平均值的方式将tensor降维，选择给参数axis参数赋值可以选择从哪个维度进行运算，在此例中axis缺失，将tensor转化为一个一维标量
    #tf.cast用于转化数据的type，在此例中将之前得出的bool向量转化为浮点数向量，取平均转化为一维标量之后即为预测的正确率
    acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # train
    #迭代次数
    n_epoch = 200
    learning_rate = 0.0001
    #每迭代十次打印一份信息
    print_freq = 10

    #获取所有layer中的参数
    train_params = network.all_params
    
    #选择优化器
    train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999,
        epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params)

    #初始化所有变量
    tl.layers.initialize_global_variables(sess)
    
    #输出参数和layer的信息
    network.print_params()
    network.print_layers()

    print('   learning_rate: %f' % learning_rate)
    print('   batch_size: %d' % batch_size)

    
    #进行迭代
    for epoch in range(n_epoch):
        start_time = time.time()
        for X_train_a, y_train_a in tl.iterate.minibatches(
                                    X_train, y_train, batch_size, shuffle=True):
            feed_dict = {x: X_train_a, y_: y_train_a}
            
            #添加噪声层，.all_dorp为所有layer均有的属性，返回一个dict：{placeholder:float(噪声层的概率)}
            feed_dict.update( network.all_drop )        # enable noise layers
            #进行将数据加入神经网络中进行训练
            sess.run(train_op, feed_dict=feed_dict)
        #没迭代十次输出一次相关信息
        if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
            print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
            train_loss, train_acc, n_batch = 0, 0, 0
            #随机获取一批测试数据
            for X_train_a, y_train_a in tl.iterate.minibatches(
                                    X_train, y_train, batch_size, shuffle=True):
                #tl.utils.dict_to_one(),将值噪声层的概率改为1（预测时不需要dropout）
                dp_dict = tl.utils.dict_to_one( network.all_drop )    # disable noise layers
                feed_dict = {x: X_train_a, y_: y_train_a}
                feed_dict.update(dp_dict)
                #将参数放入sess中，获取损失函数和预测准确率的值
                err, ac = sess.run([cost, acc], feed_dict=feed_dict)
                train_loss += err; train_acc += ac; n_batch += 1
            print("   train loss: %f" % (train_loss/ n_batch))
            print("   train acc: %f" % (train_acc/ n_batch))
            val_loss, val_acc, n_batch = 0, 0, 0
            #在选出一批验证集并在验证集上做验证，关于数据的分割可以参考https://www.bilibili.com/video/av17204303/?from=search&seid=1804855494055998079#page=5
            for X_val_a, y_val_a in tl.iterate.minibatches(
                                        X_val, y_val, batch_size, shuffle=True):
                dp_dict = tl.utils.dict_to_one( network.all_drop )    # disable noise layers
                feed_dict = {x: X_val_a, y_: y_val_a}
                feed_dict.update(dp_dict)
                err, ac = sess.run([cost, acc], feed_dict=feed_dict)
                val_loss += err; val_acc += ac; n_batch += 1
            print("   val loss: %f" % (val_loss/ n_batch))
            print("   val acc: %f" % (val_acc/ n_batch))
            #将将卷积层以图片的形式输出
            try:
                tl.vis.CNN2d(network.all_params[0].eval(),
                                    second=10, saveable=True,
                                    name='cnn1_'+str(epoch+1), fig_idx=2012)
            except:
                print("You should change vis.CNN(), if you want to save the feature images for different dataset")

    print('Evaluation')
    #输出模型的最终表现
    test_loss, test_acc, n_batch = 0, 0, 0
    for X_test_a, y_test_a in tl.iterate.minibatches(
                                X_test, y_test, batch_size, shuffle=True):
        dp_dict = tl.utils.dict_to_one( network.all_drop )    # disable noise layers
        feed_dict = {x: X_test_a, y_: y_test_a}
        feed_dict.update(dp_dict)
        err, ac = sess.run([cost, acc], feed_dict=feed_dict)
        test_loss += err; test_acc += ac; n_batch += 1
    print("   test loss: %f" % (test_loss/n_batch))
    print("   test acc: %f" % (test_acc/n_batch))




if __name__ == '__main__':
  #启动模型
    sess = tf.InteractiveSession()
    main_test_cnn_layer()

Collecting tensorlayer
  Using cached tensorlayer-1.7.4.zip
Building wheels for collected packages: tensorlayer
  Running setup.py bdist_wheel for tensorlayer ... [?25l- \ | done
[?25h  Stored in directory: /content/.cache/pip/wheels/40/81/7a/c5db8987981df637e7f66c8887228984778b45195fa50be72a
Successfully built tensorlayer
Installing collected packages: tensorlayer
Successfully installed tensorlayer-1.7.4
Load or Download MNIST > data/mnist
data/mnist/train-images-idx3-ubyte.gz
data/mnist/t10k-images-idx3-ubyte.gz
X_train.shape (50000, 28, 28, 1)
y_train.shape (50000,)
X_val.shape (10000, 28, 28, 1)
y_val.shape (10000,)
X_test.shape (10000, 28, 28, 1)
y_test.shape (10000,)
X float32   y int64
  [TL] InputLayer  input: (128, 28, 28, 1)
  [TL] Conv2dLayer cnn1: shape:[5, 5, 1, 32] strides:[1, 1, 1, 1] pad:SAME act:relu
  [TL] PoolLayer   pool1: ksize:[1, 2, 2, 1] strides:[1, 2, 2, 1] padding:SAME pool:max_pool
  [TL] Conv2dLayer cnn2: shape:[5, 5, 32, 64] strides:[1, 1, 1, 1] pad