In [2]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np

In [3]:
mnist=input_data.read_data_sets('MNIST_data/',one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
# 初始化参数
def weight_variable(shape):
    initial = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    '''
    bias的初始化会有比较大的影响，如果采用truncated_nomal初始化的话
    模型貌似是不能收敛的
    '''
    initial = tf.constant(0.1,shape=shape)
    return tf.Variable(initial)

In [5]:
# 定义卷积层和池化层
def conv2d(x,W):
    '''
     x表示输入图像，具体定义：[batch, in_height, in_width, in_channels]`
     W表示自定义的卷积核，具体定义如下：[filter_height, filter_width, in_channels, out_channels]
     strids表示步长的维度信息，定义如下：[batch, height, width, channels]维度上的步长
    '''
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

def max_pool_2x2(x):
    '''
    x：[batch, in_height, in_width, in_channels]4个维度的
    ksize表示pool所有核的维度信息，4个参数对应于输入x的四个维度的pool
    stride：表示步长的维度信息
    '''
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')



In [6]:
'''
input_layer
'''
x_input=tf.placeholder(shape=(None,784),dtype=tf.float32)
y_input=tf.placeholder(shape=(None,10),dtype=tf.float32)

In [7]:
'''
first layer conv
这里比较奇怪，相当于我们要先将卷积核定义好，然后再输入
''' 
# 第一层卷积核size为5*5，输入图像channel为1，输出feature-map的channel为32
W_conv1=weight_variable([5,5,1,32])
b_conv1=bias_variable([32])

# 把输入变回28*28的图像
x_image=tf.reshape(x_input,shape=[-1,28,28,1])

# 做卷积
h_conv1 = tf.nn.relu(conv2d(x_image,W_conv1)+b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

In [8]:
'''
second layer conv
'''
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1,W_conv2)+b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

In [9]:
'''
Dense layer
'''
W_fc1=weight_variable([7*7*64,1024])
b_fc1=bias_variable([1024])

h_pool2_flat=tf.reshape(h_pool2,shape=[-1,7*7*64])
h_fc1=tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1)+b_fc1)

In [10]:
'''
Drop layer
'''
keep_prob=tf.placeholder(dtype=tf.float32)
h_fc1_drop=tf.nn.dropout(h_fc1,keep_prob)

In [11]:
'''
output_layer
'''
W_fc2=weight_variable([1024,10])
b_fc2=weight_variable([10])

y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop,W_fc2)+b_fc2)

In [14]:
'''
define loss
cross entropy

为了规避掉softmax+crossentropy分开计算有数值上的问题，
这里可以是用tf.nn.softmax_cross_entropy_with_logits来作为最后一层的激活函数，

最后取个均值，就可以做最后的loss了
'''
loss=-tf.reduce_sum(y_input*tf.log(y_conv))
trainer=tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss)

# 计算每个变量的梯度
grads = tf.gradients(loss, tf.trainable_variables())
grads = list(zip(grads, tf.trainable_variables()))

In [13]:
'''
evaluate model
'''
correct_pred=tf.equal(tf.argmax(y_conv,1),tf.argmax(y_input,1))
# 计算均值，cast之后的correct_pred相当于是【0,0,0,1,1,1,0】这样的数组，
# 算出它的均值就是对应的accuracy了
accuracy=tf.reduce_mean(tf.cast(correct_pred,'float'))

In [16]:
for var in tf.trainable_variables():
    tf.summary.histogram(var.name, var)
# Summarize all gradients
for grad, var in grads:
    tf.summary.histogram(var.name + '/gradient', grad)

merged_summary_op = tf.summary.merge_all()


INFO:tensorflow:Summary name Variable:0 is illegal; using Variable_0 instead.
INFO:tensorflow:Summary name Variable_1:0 is illegal; using Variable_1_0 instead.
INFO:tensorflow:Summary name Variable_2:0 is illegal; using Variable_2_0 instead.
INFO:tensorflow:Summary name Variable_3:0 is illegal; using Variable_3_0 instead.
INFO:tensorflow:Summary name Variable_4:0 is illegal; using Variable_4_0 instead.
INFO:tensorflow:Summary name Variable_5:0 is illegal; using Variable_5_0 instead.
INFO:tensorflow:Summary name Variable_6:0 is illegal; using Variable_6_0 instead.
INFO:tensorflow:Summary name Variable_7:0 is illegal; using Variable_7_0 instead.
INFO:tensorflow:Summary name Variable:0/gradient is illegal; using Variable_0/gradient instead.
INFO:tensorflow:Summary name Variable_1:0/gradient is illegal; using Variable_1_0/gradient instead.
INFO:tensorflow:Summary name Variable_2:0/gradient is illegal; using Variable_2_0/gradient instead.
INFO:tensorflow:Summary name Variable_3:0/gradient i

In [18]:
'''
train
'''
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    summary_writer = tf.summary.FileWriter('./logs/example',
                                            graph=tf.get_default_graph())
    for i in range(20000):
        batch_x,batch_y=mnist.train.next_batch(50)
        
        if i%100==0:
            result=sess.run(accuracy,feed_dict={x_input:batch_x,
                                                y_input:batch_y,
                                                keep_prob:1.0})
            print(int(i/100),result)
#         trainer.run(feed_dict={x_input:batch_x,
#                                     y_input:batch_y,
#                                     keep_prob:0.5})    
        _,summary_=sess.run([trainer,merged_summary_op],feed_dict={x_input:batch_x,
                                    y_input:batch_y,
                                    keep_prob:0.5})
        summary_writer.add_summary(summary_,i)

    batch_test_x=mnist.test.images
    batch_test_y=mnist.test.labels
    all_test=sess.run(accuracy,feed_dict={x_input:batch_test_x,
                                          y_input:batch_test_y,
                                          keep_prob:1.0})
    print(all_test)

0 0.12
1 0.84
2 0.94
3 0.9
4 0.86
5 0.92
6 0.98
7 0.96
8 0.96
9 0.96
10 0.98
11 0.96
12 1.0
13 0.94
14 0.94
15 0.98
16 1.0
17 0.98
18 0.92
19 0.98
20 0.98
21 0.98
22 1.0
23 0.96
24 1.0
25 1.0
26 0.96
27 0.98
28 1.0
29 0.96
30 0.98
31 1.0


KeyboardInterrupt: 