In [1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

  from ._conv import register_converters as _register_converters


In [2]:
mnist=input_data.read_data_sets('MNIST_data/',one_hot=True)

data_dir='./MNIST_data/'
log_dir='./logs/example4'

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
def variable_summary(var):
    with tf.name_scope('summaries') as scope:
        mean=tf.reduce_mean(var)
        tf.summary.scalar('mean',mean)
        
        stddev=tf.sqrt(tf.reduce_mean(tf.square(var-mean)))
        
        tf.summary.scalar('stddev',stddev)
        tf.summary.scalar('max',tf.reduce_max(var))
        tf.summary.scalar('min',tf.reduce_min(var))
        
        tf.summary.histogram('histogram',var)

In [4]:

'''
回忆卷积步骤
1、定义卷积核与bias
2、上一层输出，卷积核，一起输入到卷积操作中，
3、将结果加上bias
4、返回结果

'''

def conv_2d(input_data,k_h=3,k_w=3,s_h=1,s_w=1,
            output_channel=64,padding='SAME',name='conv_i'):
    
    input_data_channel=int(input_data.shape[-1])
    # 这个可以理解为是方便显示用的
    with tf.name_scope(name) as scope:
        print(input_data_channel)
        initial_W = tf.truncated_normal(shape=[k_h,k_w,
                                               input_data_channel,
                                               output_channel],
                                        stddev=0.1)
        W=tf.Variable(initial_W,name=name+'_'+'weights')
        
        variable_summary(W)
        
        initial_b = tf.constant(0.1,shape=[output_channel])
        b=tf.Variable(initial_b,name=name+'_'+'bias')
        
        variable_summary(b)
        '''
        strids表示步长的维度信息，定义如下：
                [batch, height, width, channels]维度上的步长
        '''
        conv_op=tf.nn.conv2d(input_data,W,strides=[1,s_h,s_w,1],
                             padding=padding,
                             name=name+'_'+'conv')
        
        result=conv_op+b
        
        # 默认激活函数是relu
        result_act=tf.nn.relu(result,name=name+'_'+'activation')
        return result_act

In [5]:
'''
定义池化层
'''
def max_pool_2d(input_data,p_h=2,p_w=2,s_h=2,s_w=2,
                padding='SAME',name='pool_i'):
    pool_size=[1,p_h,p_w,1]
    pool_strid=[1,s_h,s_w,1]
    with tf.name_scope(name) as scope:
        max_poolled=tf.nn.max_pool(input_data,
                                   ksize=pool_size,
                                   strides=pool_strid,
                                  padding=padding,name=name+'_maxpool')
    
    return max_poolled

In [6]:
'''
定义全连接层
'''
def dense_2d(input_data,output_dim,keep_prob=0.9,name='dense_i',act=tf.nn.relu):
    input_dim=int(input_data.shape[-1])
    with tf.name_scope(name) as scope:
        # 相当于通过这个方式定义了随机初始化的格式
        # 可以理解为只有被定义为/被包裹为variable的tensor，
        # 才能够被用来被梯度下降学习
        init_W=tf.truncated_normal(shape=[input_dim,output_dim],
                                        stddev=0.1)
        W=tf.Variable(init_W,name=name+'_weights')
        
        variable_summary(W)
        
        b=tf.constant(value=0.1,shape=[output_dim])
        b=tf.Variable(b,name=name+'_bias')
        variable_summary(b)
        
        dense=tf.matmul(input_data,W)+b
        dense=act(dense)
            
        dense_dropped=tf.nn.dropout(dense,keep_prob=keep_prob)
    
    return dense_dropped


In [13]:
tf.reset_default_graph()
'''
定义输入
'''
x_input=tf.placeholder(shape=(None,784),dtype=tf.float32,name='x_input')
y_input=tf.placeholder(shape=(None,10),dtype=tf.float32,name='y_input')
keep_prob=tf.placeholder(dtype=tf.float32,name='keep_prob')

'''
reshape输入
'''
x_input_reshaped=tf.reshape(x_input,(-1,28,28,1))

'''
输入图片太小，仿照VGG，做了几层全连接
'''


part_1_conv_1=conv_2d(x_input_reshaped,
                      output_channel=64,k_h=3,k_w=3,
                      padding='SAME',name='part_1_conv1')
part_1_conv_2=conv_2d(part_1_conv_1,
                      output_channel=64,k_h=3,k_w=3,
                      padding='SAME',name='part_1_conv2')
part_1_max_pool=max_pool_2d(part_1_conv_2,
                            p_h=2,p_w=2,s_h=2,s_w=2,
                            padding='SAME',name='part_1_maxpool1')

part_2_conv_1=conv_2d(part_1_max_pool,
                      output_channel=128,k_h=3,k_w=3,
                      padding='SAME',name='part_2_conv1')
part_2_conv_2=conv_2d(part_2_conv_1,
                      output_channel=128,k_h=3,k_w=3,
                      padding='SAME',name='part_2_conv2')
part_2_max_pool=max_pool_2d(part_2_conv_2,
                            p_h=2,p_w=2,s_h=2,s_w=2,
                            padding='SAME',name='part_2_maxpool1')

part_3_conv_1=conv_2d(part_2_max_pool,
                      output_channel=256,k_h=3,k_w=3,
                      padding='SAME',name='part_3_conv1')
part_3_conv_2=conv_2d(part_3_conv_1,
                      output_channel=256,k_h=3,k_w=3,
                      padding='SAME',name='part_3_conv2')


part_4_conv_1=conv_2d(part_3_conv_2,
                      output_channel=256,k_h=3,k_w=3,
                      padding='SAME',name='part_4_conv1')
part_4_conv_2=conv_2d(part_4_conv_1,
                      output_channel=256,k_h=3,k_w=3,
                      padding='SAME',name='part_4_conv2')


part_4_max_pool=max_pool_2d(part_4_conv_2,
                            p_h=2,p_w=2,
                            s_h=1,s_w=1,padding='SAME',
                            name='part_4_maxpool')

# '''
# 拉平，准备做全连接
# '''
part_4_shape=part_4_max_pool.get_shape()
flatten_shape=part_4_shape[1]*part_4_shape[2]*part_4_shape[3]
flattened=tf.reshape(part_4_max_pool,(-1,flatten_shape))

print(flatten_shape)

# '''
# 全连接
# '''
dense_1=dense_2d(flattened,output_dim=1024,
                 keep_prob=keep_prob,name='dense_1')
dense_2=dense_2d(dense_1,output_dim=1024,
                 keep_prob=keep_prob,name='dense_2')

W_fc3 = tf.Variable(tf.truncated_normal([1024,10],stddev=0.1))
b_fc3 = tf.Variable(tf.constant(0.1,shape=[10]))
dense_3 = tf.matmul(dense_2, W_fc3) + b_fc3

# dense_3=dense_2d(dense_2,output_dim=10,
#                  act=tf.nn.softmax,
#                  keep_prob=1.0,name='dense_3')

'''
定义loss和优化器 tf.clip_by_value(y,1e-10,1.0)
然而自己写的loss会有数值问题，导致log里面计算的值是0，从而报一个Nan in summary histogram for:xxx的错
所以要这样处理一下：
从
y_input*tf.log(dense_3)
变成
y_input*tf.log(tf.clip_by_value(dense_3,1e-10,1.0))

虽然按照上面的方式改了，但是效果还是不行

现在做了如下改动
1、减少了pool的次数。
2、使用tf提供的cross-entropy-with-logits-v2来操作
3、才使得效果提升了
4、提升了学习率，从1e-4提升到1e-3。
在未提升学习率前，准确率会先从0.1提升至0.8，然后快速回落至0.1~0.3之间震荡
然后当提升学习率到1e-3之后，准确率就稳定了。

最终解释，什么样效果才好
https://www.zybuluo.com/w460461339/note/1393445
'''

# dense_3_sm=tf.nn.softmax(dense_3)
# cross_1=y_input*tf.log(tf.clip_by_value(dense_3_sm,1e-36,1.0))
# cross_1=y_input*tf.log(tf.nn.softmax(dense_3))
# loss=-tf.reduce_sum(cross_1)
                    
# print(loss)

cross_2=tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_input, logits=dense_3)
loss_2=tf.reduce_mean(cross_2)


trainer=tf.train.AdamOptimizer(learning_rate=1e-4).minimize(loss_2)

tf.summary.scalar('loss',loss_2)

1
64
64
128
128
256
256
256
12544


<tf.Tensor 'loss:0' shape=() dtype=string>

In [14]:
'''
计算精度
'''
temp_acc=tf.equal(tf.argmax(dense_3,1),tf.argmax(y_input,1))
accuracy=tf.reduce_mean(tf.cast(temp_acc,'float'))

tf.summary.scalar('accuracy',accuracy)

<tf.Tensor 'accuracy:0' shape=() dtype=string>

In [15]:
# 合并summary
merged = tf.summary.merge_all()

'''
train
'''
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    # 写到磁盘
    train_writer = tf.summary.FileWriter(log_dir+'/train',sess.graph)
    test_writer = tf.summary.FileWriter(log_dir+'/test')
   
    for i in range(20000):
        batch_x,batch_y=mnist.train.next_batch(50)
        
        if i%100==0:
            summary_res,result=sess.run([merged,accuracy],feed_dict={x_input:batch_x,
                                                y_input:batch_y,
                                                keep_prob:1.0})
#             my_loss,tf_loss,cr_1,cr_2,dense3_res,dense3_sm_res=sess.run([loss,loss_2,cross_1,cross_2,dense_3,dense_3_sm],feed_dict={x_input:batch_x,
#                                                 y_input:batch_y,
#                                                 keep_prob:1.0})
#             print('my_loss:',my_loss,cr_1,'tf_loss:',tf_loss,cr_2)
            
#             print(batch_y,dense3_res,dense3_sm_res)
            
            test_writer.add_summary(summary_res, i)
            print(int(i/100),result)

        summary_train,_=sess.run([merged,trainer],feed_dict={x_input:batch_x,
                                    y_input:batch_y,
                                    keep_prob:0.5})
        train_writer.add_summary(summary_train, i)

    batch_test_x=mnist.test.images
    batch_test_y=mnist.test.labels
    all_test=sess.run(accuracy,feed_dict={x_input:batch_test_x,
                                          y_input:batch_test_y,
                                          keep_prob:1.0})
    print(all_test)

0 0.04
1 0.64
2 0.64
3 0.24
4 0.2
5 0.24
6 0.18
7 0.16
8 0.16
9 0.2
10 0.16
11 0.18
12 0.14
13 0.16
14 0.3
15 0.22
16 0.16
17 0.1
18 0.18
19 0.16
20 0.12
21 0.08
22 0.28
23 0.24
24 0.24
25 0.32
26 0.18
27 0.12
28 0.1
29 0.08
30 0.14
31 0.14
32 0.16
33 0.08
34 0.08
35 0.12
36 0.16
37 0.14
38 0.04
39 0.14


KeyboardInterrupt: 