## 导入模型和数据

In [1]:
import warnings
warnings.filterwarnings("ignore")

#import modules
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# 指定使用GPU编号
import os
os.environ['CUDA_VISIBLE_DEVICES']='0'

#import data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


## 定义基本函数

In [2]:
# 定义池化层
def max_pool(x, size):
    return tf.nn.max_pool(x, ksize=[1, size, size, 1], strides=[1, size, size, 1], padding='SAME')

# 定义卷积操作
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# 定义权重
def weight_varibale(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

# 定义偏置
def bias_varibale(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

# 定义基本卷积层
def conv_block(input_, kernel_size=3, input_channel=1, output_channel=32, activate=True):
    # 定义
    w_conv = weight_varibale([kernel_size, kernel_size, input_channel, output_channel])
    b_conv = weight_varibale([output_channel])
    if activate == True:                              
        conv_out = tf.nn.relu(conv2d(input_, w_conv) + b_conv)
    else:
        conv_out = conv2d(input_, w_conv0) + b_conv0
    return conv_out

# 定义全连接层
def fc_block(input_, input_dim=1, outout_dim=32, activate='relu'):
    w_fc = weight_varibale([input_dim, outout_dim])
    b_fc = weight_varibale([outout_dim])
    if activate == 'relu':                              
        fc_out = tf.nn.relu(tf.matmul(input_, w_fc) + b_fc)
    elif activate == 'sigmoid':  
        fc_out = tf.nn.sigmoid(tf.matmul(input_, w_fc) + b_fc)
    else:
        fc_out = tf.matmul(input_, w_fc) + b_fc
    return fc_out

## 基本参数设定

In [3]:
IMG_SIZE = 28
IMG_CHANNEL = 1
IMG_CLASS = 10
TRAIN_RATE = 0.001
TRAIN_RATE_DELAY = 0.99
BATCH_SIZE = 64
ITER = 3000    

### 定义网络结构

In [4]:
# 定义输入输出占位符
X = tf.placeholder(tf.float32, [None, IMG_SIZE, IMG_SIZE, IMG_CHANNEL])
y_ = tf.placeholder(tf.float32, [None, IMG_CLASS])

# 记录输入图片
tf.summary.image('input', X, 3)

# 第1层卷积层
conv_1 = conv_block(input_=X, kernel_size=3, input_channel=IMG_CHANNEL, output_channel=16, activate=True)

# 第2层卷积层
conv_2 = conv_block(input_=conv_1, kernel_size=3, input_channel=16, output_channel=32, activate=True)

# 池化层
conv_2 = max_pool(conv_2, 2)

# 第3层卷积层
conv_3 = conv_block(input_=conv_2, kernel_size=3, input_channel=32, output_channel=32, activate=True)

# 第4层卷积层
conv_4 = conv_block(input_=conv_3, kernel_size=3, input_channel=32, output_channel=32, activate=True)

# 池化层
conv_4 = max_pool(conv_4, 2)

# 将三维特征拉成一列
conv_4_shape = conv_4.get_shape().as_list()
conv_4_len = conv_4_shape[1] * conv_4_shape[2] * conv_4_shape[3]
conv_4_reshape = tf.reshape(conv_4, shape=[-1, conv_4_len])

# 全连接层
fc_1 = fc_block(input_=conv_4_reshape, input_dim=conv_4_len, outout_dim=128, activate='relu')

# 输出层
fc_out = fc_block(input_=fc_1, input_dim=128, outout_dim=10, activate='sigmoid')

In [5]:
print(X, fc_out)

Tensor("Placeholder:0", shape=(?, 28, 28, 1), dtype=float32) Tensor("Sigmoid:0", shape=(?, 10), dtype=float32)


## 定义损失函数和准确性度量

In [6]:
# 损失函数
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=y_, logits=fc_out))
# 记录损失函数
tf.summary.scalar('loss', cross_entropy)

# 准确率度量
acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(fc_out, 1), tf.argmax(y_, 1)), tf.float32))
# 记录准确率
tf.summary.scalar('acc', acc);

## 定义优化器

In [7]:
global_step = tf.Variable(0, trainable=False)
learn_rate = tf.train.exponential_decay(TRAIN_RATE, global_step, 10, TRAIN_RATE_DELAY)
train_step = tf.train.AdamOptimizer(learn_rate).minimize(cross_entropy, global_step=global_step)
tf.summary.scalar('learn_rate', learn_rate);

## 记录训练时的参数和创建保存模型的对象

In [8]:
merge = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('SummarWriter/' + 'train', tf.get_default_graph())
test_writer = tf.summary.FileWriter('SummarWriter/' + 'test', tf.get_default_graph())

saver = tf.train.Saver()

## 开始训练

In [None]:
# 创建会话
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    for i in range(ITER):
        train_x, train_y = mnist.train.next_batch(BATCH_SIZE)
        train_x = sess.run(tf.reshape(train_x, (BATCH_SIZE, IMG_SIZE, IMG_SIZE, IMG_CHANNEL)))
        test_x, test_y = mnist.validation.next_batch(BATCH_SIZE)
        test_x = sess.run(tf.reshape(test_x, (BATCH_SIZE, IMG_SIZE, IMG_SIZE, IMG_CHANNEL)))
    
        _ = sess.run(train_step, feed_dict={X:train_x, y_:train_y})
        
        summary = sess.run(merge, feed_dict={X:train_x, y_:train_y})
        train_writer.add_summary(summary, i)
        
        summary = sess.run(merge, feed_dict={X:test_x, y_:test_y})
        test_writer.add_summary(summary, i)
        
        
        if i % 100 == 0:            
            train_acc, train_loss = sess.run([acc, cross_entropy], feed_dict={X:train_x, y_:train_y})
            print('iter:', i, 'train_acc:', train_acc, 'train_loss', train_loss)
            
            test_acc, test_loss = sess.run([acc, cross_entropy], feed_dict={X:test_x, y_:test_y})
            print('iter:', i, 'test_acc:', test_acc, 'test_loss', test_loss)
            print('\n')
            
        if i % 1000 == 0 and i != 0:
            saver.save(sess, './ckpt/' + 'mycnn' + '-' + str(i) + '/' + 'mycnn', global_step=i)

iter: 0 train_acc: 0.09375 train_loss 2.2903957
iter: 0 test_acc: 0.140625 test_loss 2.2956057


iter: 100 train_acc: 0.921875 train_loss 1.5856702
iter: 100 test_acc: 0.921875 test_loss 1.541276


iter: 200 train_acc: 0.984375 train_loss 1.493273
iter: 200 test_acc: 0.96875 test_loss 1.5213857


iter: 300 train_acc: 0.921875 train_loss 1.5051115
iter: 300 test_acc: 0.953125 test_loss 1.4977945


iter: 400 train_acc: 0.921875 train_loss 1.5140972
iter: 400 test_acc: 0.96875 test_loss 1.4949093


iter: 500 train_acc: 0.953125 train_loss 1.4918191
iter: 500 test_acc: 0.953125 test_loss 1.5033385


iter: 600 train_acc: 0.953125 train_loss 1.5021191
iter: 600 test_acc: 0.984375 test_loss 1.4860845


iter: 700 train_acc: 0.984375 train_loss 1.4859986
iter: 700 test_acc: 0.96875 test_loss 1.487331


iter: 800 train_acc: 0.953125 train_loss 1.4989743
iter: 800 test_acc: 0.953125 test_loss 1.4897635


iter: 900 train_acc: 0.984375 train_loss 1.4877546
iter: 900 test_acc: 0.96875 test_loss 1.47