In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials import input_data
import numpy as np

mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

batch_size = 100

n_batch = mnist.train.num_examples // batch_size

x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

# 这个placeholder是之后用来设置dropout的参数
keep_prob = tf.placeholder(tf.float32)

# 创建一个简单的神经网络，只有输入层和有输出层， 没有隐藏层，而且这里都是将参数初始化为0，其实这个不是好的初始化方式
# W = tf.Variable(tf.zeros([784, 10]))
# b = tf.Variable(tf.zeros([10]))

# 对于权值的初始化，我们一般使用的是截断的正态分布truncated_normal,标准差为0.1
W1 = tf.Variable(tf.truncated_normal([784, 2000], stddev=0.1))
# 偏置值初始化为0.1
b1 = tf.Variable(tf.zeros([2000])+0.1)
# 定义第一层L1的输出，我们使用的激活函数是双曲正切函数 tanh
L1 = tf.nn.tanh(tf.matmul(x, W1)+b1)
# tensorflow 封装好了dropout 函数，传入的第一个参数是神经网络中某一个层中间的输出， 第二个参数是用来设置又多少参数是工作的
# 到底设置成多少，可以在训练的时候，通过placeholder的feed_dict{}传入进去
L1_drop = tf.nn.dropout(L1, keep_prob)

W2 = tf.Variable(tf.truncated_normal([2000, 2000], stddev=0.1))
b2 = tf.Variable(tf.zeros([2000])+0.1)
# 注意这里传入的是L1_drop，而不是L1
L2 = tf.nn.tanh(tf.matmul(L1_drop, W2)+b2)
L2_drop = tf.nn.dropout(L2, keep_prob)

# 其实训练手写数字识别不需要这么复杂的网络的，在这里是故意定义一个比较复杂的网络，就是为了让他过拟合，然后好显示出dropout 的效果
# 一般出现过拟合往往是网络太过复杂，或者数据量太小导致的
W3 = tf.Variable(tf.truncated_normal([2000, 1000], stddev=0.1))
b3 = tf.Variable(tf.zeros([1000])+0.1)
L3 = tf.nn.tanh(tf.matmul(L2_drop, W3)+b3)
L3_drop = tf.nn.dropout(L3, keep_prob)

W4 = tf.Variable(tf.truncated_normal([1000, 10], stddev=0.1))
b4 = tf.Variable(tf.zeros([10])+0.1)
L4 = tf.nn.tanh(tf.matmul(L2_drop, W3)+b3)
prediction = tf.nn.softmax(tf.matmul(L3_drop, W4) + b4)


# W = tf.Variable(tf.random_normal([784, 10], stddev=0.314, mean=0, seed=1))
# b = tf.Variable(tf.random_normal([10], stddev=0.314, mean=0, seed=1))
# prediction = tf.nn.softmax(tf.matmul(x, W) + b)


# 损失函数不再使用二次代价函数，而是使用交叉熵，在这里，我们使用与softmax搭配使用的交叉熵
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits = tf.matmul(L3_drop, W4) + b4))

train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

init = tf.global_variables_initializer()
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(31):
        for batch in range(n_batch):
            batch_xs, batch_ys = mnist.train.next_batch(batch_size)
            # 如果在这里keep_prob传入的是0.7,表示在每个隐藏层中有70%的神经元是工作的
            sess.run(train_step, feed_dict={x:batch_xs, y:batch_ys, keep_prob:1.0})
        
        # 在测试的时候，还是会让所有的神经元都工作，所以keep_prob都设置为1
        acc_test = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels, keep_prob:1.0})
        acc_train = sess.run(accuracy, feed_dict={x:mnist.train.images, y:mnist.train.labels, keep_prob:1.0})
        print("Iter", str(epoch), "Testing Accuracy = ", str(acc_test), "Training Accuracy = ", str(acc_train))
        
        
        

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
('Iter', '0', 'Testing Accuracy = ', '0.9541', 'Training Accuracy = ', '0.9755273')
('Iter', '1', 'Testing Accuracy = ', '0.9667', 'Training Accuracy = ', '0.9939455')
('Iter', '2', 'Testing Accuracy = ', '0.9719', 'Training Accuracy = ', '0.9996')
('Iter', '3', 'Testing Accuracy = ', '0.971', 'Training Accuracy = ', '1.0')
('Iter', '4', 'Testing Accuracy = ', '0.973', 'Training Accuracy = ', '1.0')
