# 手写数字识别

In [33]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from matplotlib import pyplot as plt
%matplotlib inline

#打印等级
tf.logging.set_verbosity(tf.logging.INFO)

In [34]:
#数据集所在路径
url = "./"

mnist = input_data.read_data_sets(url)

#打印一下数据集
print(mnist.train.images.shape)
print(mnist.train.labels.shape)

print(mnist.validation.images.shape)
print(mnist.validation.labels.shape)

print(mnist.test.images.shape)
print(mnist.test.labels.shape)

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz
(55000, 784)
(55000,)
(5000, 784)
(5000,)
(10000, 784)
(10000,)


In [35]:
x = tf.placeholder("float",[None,784])
y = tf.placeholder("int64",[None])
learning_rate = tf.placeholder("float")

In [36]:
#高斯分布初始化
def initialize(shape,stddev=0.1):
    return tf.truncated_normal(shape,stddev=0.1)

#Xavier初始化
def Xavier(node_in,node_out):
    return np.random.randn(node_in,node_out).astype('float32')/np.sqrt(node_in)

#MSRA 初始化
def MSRA(node_in,node_out):
      return np.random.randn(node_in,node_out).astype('float32') / np.sqrt(node_in/2)

'''
构造神经网络
hidden1为与输入层连接的隐藏层
**kwags为可变参数，可以传入任意数目的隐层
initial_way为初始化方式
'''
def create_network(hidden1,initial_way,**kwags):
    hiddens = [hidden1]
    #高斯
    if initial_way == initialize:
        W = tf.Variable(initial_way([784,hidden1]))
        b = tf.Variable(initial_way([hidden1]))
    else:
        W = tf.Variable(initial_way(784,hidden1))
        b = tf.Variable(initial_way(1,hidden1))
    
    #w未经激活的输出
    logits = tf.matmul(x,W)+b
    #通过Relu函数激活
    output = tf.nn.relu(logits)

    if kwags != '':
        for k in kwags:
            if initial_way == initialize: #高斯
                W = tf.Variable(initial_way([hiddens[-1],kwags[k]]))
                b = tf.Variable(initial_way([kwags[k]]))
            else:
                W = tf.Variable(initial_way(hiddens[-1], kwags[k]))
                b = tf.Variable(initial_way(1, kwags[k]))
    
            logits = tf.matmul(output,W)+b
            output = tf.nn.relu(logits)
    
    if initial_way == initialize:
        W = tf.Variable(initial_way([hiddens[-1], 10]))
        b = tf.Variable(initial_way([10]))
    else:
        W = tf.Variable(initial_way(hiddens[-1], 10))
        b = tf.Variable(initial_way(1, 10))       
    
    logits = tf.matmul(output,W)+b
    
    return logits,W




'''
lambda_flag为正则参数，L1/L2,默认为None
lambda_value为正则系数
initial_way初始化分布,默认高斯分布
'''
def train(batch_size,training_step,lr,hidden1,lambda_flag=None,lambda_value=0.1,initial_way=initialize,**kwags):
    
    logits,W = create_network(hidden1,initial_way,**kwags)
    if lambda_flag == 'l1':#L1正则
        cross_entropy_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y)
            + tf.contrib.layers.l1_regularizer(lambda_value)(W))
    if lambda_flag == 'l2':#L2正则
        cross_entropy_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y)
            + tf.contrib.layers.l2_regularizer(lambda_value)(W))

    if lambda_flag is None:
        cross_entropy_loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y))

    #优化器 最速下降
    optimizer = tf.train.GradientDescentOptimizer(
    learning_rate=learning_rate).minimize(cross_entropy_loss)
    
    pred = tf.nn.softmax(logits)
    correct_pred = tf.equal(tf.arg_max(pred,1),y)
    accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        train_data = {
            x:mnist.train.images,
            y:mnist.train.labels,
        }
        
        validate_data = {
            x:mnist.validation.images,
            y:mnist.validation.labels,
        }
        test_data = {
            x:mnist.test.images,
            y:mnist.test.labels,
        }

        #训练步数
        for i in range(training_step):
            xs,ys = mnist.train.next_batch(batch_size)
            _,loss = sess.run(
                [optimizer,cross_entropy_loss],
                feed_dict={
                    x:xs,y:ys,learning_rate:lr
                }
            )

            if (i) > 0 and (i) % 100 == 0:
                validata_accuracy = sess.run(accuracy,feed_dict=validate_data)
                print(
                "after %d training steps, the loss is %g, the validation accuracy is %g" 
                % (i,loss,validata_accuracy)
                )

        print("the traning is finish!")

        acc_train=sess.run(accuracy,feed_dict=train_data)
        acc_test = sess.run(accuracy,feed_dict=test_data)
        print("the train accuracy is :",acc_train)
        print("the test accuracy is :",acc_test)
        

## 初始状态

In [37]:
train(batch_size=32,training_step=1000,lr=0.3,hidden1=100)

after 100 training steps, the loss is 0.71379, the validation accuracy is 0.8512
after 200 training steps, the loss is 0.197124, the validation accuracy is 0.9144
after 300 training steps, the loss is 0.346137, the validation accuracy is 0.914
after 400 training steps, the loss is 0.237436, the validation accuracy is 0.9316
after 500 training steps, the loss is 0.368397, the validation accuracy is 0.938
after 600 training steps, the loss is 0.23627, the validation accuracy is 0.9378
after 700 training steps, the loss is 0.396076, the validation accuracy is 0.9454
after 800 training steps, the loss is 0.425638, the validation accuracy is 0.947
after 900 training steps, the loss is 0.0755001, the validation accuracy is 0.9576
the traning is finish!
the train accuracy is : 0.9520909
the test accuracy is : 0.9499


可以看到在未进行参数调优之前，准确率只有94%左右

## 下面增加训练次数至20000 ，并降低学习率至0.1

In [38]:
train(batch_size=32,training_step=20000,lr=0.1,hidden1=100)

after 100 training steps, the loss is 0.519801, the validation accuracy is 0.8366
after 200 training steps, the loss is 0.570863, the validation accuracy is 0.8868
after 300 training steps, the loss is 0.479428, the validation accuracy is 0.893
after 400 training steps, the loss is 0.28, the validation accuracy is 0.9134
after 500 training steps, the loss is 0.378267, the validation accuracy is 0.9072
after 600 training steps, the loss is 0.449428, the validation accuracy is 0.9188
after 700 training steps, the loss is 0.234085, the validation accuracy is 0.919
after 800 training steps, the loss is 0.0979101, the validation accuracy is 0.926
after 900 training steps, the loss is 0.484325, the validation accuracy is 0.9328
after 1000 training steps, the loss is 0.0974513, the validation accuracy is 0.9314
after 1100 training steps, the loss is 0.15352, the validation accuracy is 0.9366
after 1200 training steps, the loss is 0.382811, the validation accuracy is 0.9344
after 1300 training

after 10000 training steps, the loss is 0.0326659, the validation accuracy is 0.9718
after 10100 training steps, the loss is 0.0918415, the validation accuracy is 0.9748
after 10200 training steps, the loss is 0.076035, the validation accuracy is 0.9748
after 10300 training steps, the loss is 0.156449, the validation accuracy is 0.9746
after 10400 training steps, the loss is 0.114482, the validation accuracy is 0.9724
after 10500 training steps, the loss is 0.0922276, the validation accuracy is 0.9752
after 10600 training steps, the loss is 0.0384158, the validation accuracy is 0.9758
after 10700 training steps, the loss is 0.0355638, the validation accuracy is 0.9744
after 10800 training steps, the loss is 0.0402294, the validation accuracy is 0.9728
after 10900 training steps, the loss is 0.0122917, the validation accuracy is 0.9732
after 11000 training steps, the loss is 0.0506499, the validation accuracy is 0.9744
after 11100 training steps, the loss is 0.180277, the validation acc

after 19700 training steps, the loss is 0.0420382, the validation accuracy is 0.9758
after 19800 training steps, the loss is 0.0077823, the validation accuracy is 0.9776
after 19900 training steps, the loss is 0.00349406, the validation accuracy is 0.9784
the traning is finish!
the train accuracy is : 0.99265456
the test accuracy is : 0.9746


可以看到效果，准确率达到了97%左右，比之前提高了3%~

## 下面单纯增加隐层数量 增加一层神经元个数为100

In [17]:
train(batch_size=32, training_step=1000, lr=0.3, hidden1=100, hidden2=100)

after 100 training steps, the loss is 0.50878, the validation accuracy is 0.7608
after 200 training steps, the loss is 0.859833, the validation accuracy is 0.8164
after 300 training steps, the loss is 0.080929, the validation accuracy is 0.921
after 400 training steps, the loss is 0.347211, the validation accuracy is 0.9296
after 500 training steps, the loss is 0.351842, the validation accuracy is 0.9358
after 600 training steps, the loss is 0.117844, the validation accuracy is 0.9378
after 700 training steps, the loss is 0.233173, the validation accuracy is 0.9324
after 800 training steps, the loss is 0.262243, the validation accuracy is 0.936
after 900 training steps, the loss is 0.190898, the validation accuracy is 0.9394
the traning is finish!
the train accuracy is : 0.948
the test accuracy is : 0.9431


仅仅增加隐层数量效果不没有预期的好，准确率也只有94%左右

## 在增加隐层的基础上，增加训练次数并降低学习率

In [19]:
train(batch_size=32, training_step=20000, lr=0.1, hidden1=100, hidden2=100)

after 100 training steps, the loss is 0.484106, the validation accuracy is 0.8332
after 200 training steps, the loss is 0.241133, the validation accuracy is 0.8864
after 300 training steps, the loss is 0.245671, the validation accuracy is 0.8934
after 400 training steps, the loss is 0.409385, the validation accuracy is 0.916
after 500 training steps, the loss is 0.0724811, the validation accuracy is 0.9228
after 600 training steps, the loss is 0.252472, the validation accuracy is 0.9252
after 700 training steps, the loss is 0.155133, the validation accuracy is 0.9294
after 800 training steps, the loss is 0.149752, the validation accuracy is 0.9374
after 900 training steps, the loss is 0.128861, the validation accuracy is 0.9416
after 1000 training steps, the loss is 0.434686, the validation accuracy is 0.935
after 1100 training steps, the loss is 0.779429, the validation accuracy is 0.9066
after 1200 training steps, the loss is 0.15246, the validation accuracy is 0.9464
after 1300 trai

after 10000 training steps, the loss is 0.0162476, the validation accuracy is 0.975
after 10100 training steps, the loss is 0.0113536, the validation accuracy is 0.9732
after 10200 training steps, the loss is 0.0131036, the validation accuracy is 0.9762
after 10300 training steps, the loss is 0.0130197, the validation accuracy is 0.9754
after 10400 training steps, the loss is 0.0715552, the validation accuracy is 0.9772
after 10500 training steps, the loss is 0.000424226, the validation accuracy is 0.9774
after 10600 training steps, the loss is 0.0356783, the validation accuracy is 0.977
after 10700 training steps, the loss is 0.00535564, the validation accuracy is 0.9758
after 10800 training steps, the loss is 0.0167313, the validation accuracy is 0.9758
after 10900 training steps, the loss is 0.00348134, the validation accuracy is 0.9762
after 11000 training steps, the loss is 0.136834, the validation accuracy is 0.9746
after 11100 training steps, the loss is 0.124987, the validation

after 19700 training steps, the loss is 0.0254301, the validation accuracy is 0.9798
after 19800 training steps, the loss is 0.00265462, the validation accuracy is 0.9806
after 19900 training steps, the loss is 0.048022, the validation accuracy is 0.9764
the traning is finish!
the train accuracy is : 0.9976182
the test accuracy is : 0.9791


可以看到准确率达到了97.9%,接近98%，同时在10000次左右效果已经很好，下面为了减少训练时间，训练次数采用10000次

## 在上面基础上增加神经元数量

In [40]:
train(batch_size=32, training_step=10000, lr=0.1, hidden1=1000, hidden2=1000)

after 100 training steps, the loss is 0.444278, the validation accuracy is 0.8916
after 200 training steps, the loss is 0.367151, the validation accuracy is 0.9238
after 300 training steps, the loss is 0.330017, the validation accuracy is 0.9302
after 400 training steps, the loss is 0.0342281, the validation accuracy is 0.9412
after 500 training steps, the loss is 0.058277, the validation accuracy is 0.9492
after 600 training steps, the loss is 0.180861, the validation accuracy is 0.9508
after 700 training steps, the loss is 0.111979, the validation accuracy is 0.9528
after 800 training steps, the loss is 0.128166, the validation accuracy is 0.957
after 900 training steps, the loss is 0.036341, the validation accuracy is 0.9598
after 1000 training steps, the loss is 0.234459, the validation accuracy is 0.958
after 1100 training steps, the loss is 0.0309909, the validation accuracy is 0.964
after 1200 training steps, the loss is 0.143733, the validation accuracy is 0.9676
after 1300 tra

after 9900 training steps, the loss is 0.000898366, the validation accuracy is 0.9832
the traning is finish!
the train accuracy is : 0.9999091
the test accuracy is : 0.9803


上面使用了10000次训练，两层神经元，每层神经元个数为1000个，训练时间较长，最终在测试集上的准确率可以达到98%！！训练集的准确率已经约为1.0

## 在上面的基础上增加正则项，首先为L1正则,不再增加隐层

In [42]:
train(batch_size=32, training_step=1000, lr=0.1, hidden1=1000, lambda_flag='l1',lambda_value=0.01)

after 100 training steps, the loss is 1.96987, the validation accuracy is 0.8716
after 200 training steps, the loss is 1.06809, the validation accuracy is 0.8314
after 300 training steps, the loss is 1.05262, the validation accuracy is 0.8226
after 400 training steps, the loss is 1.07248, the validation accuracy is 0.851
after 500 training steps, the loss is 1.04777, the validation accuracy is 0.859
after 600 training steps, the loss is 0.992822, the validation accuracy is 0.8328
after 700 training steps, the loss is 1.13705, the validation accuracy is 0.8958
after 800 training steps, the loss is 0.767784, the validation accuracy is 0.8972
after 900 training steps, the loss is 0.70365, the validation accuracy is 0.9012
the traning is finish!
the train accuracy is : 0.88174546
the test accuracy is : 0.8917


效果并不是特别好！只有约89%的准确率

## 改为L2正则试一试

In [44]:
train(batch_size=32, training_step=1000, lr=0.1, hidden1=1000, lambda_flag='l2',lambda_value=0.01)

after 100 training steps, the loss is 1.0163, the validation accuracy is 0.8728
after 200 training steps, the loss is 0.768788, the validation accuracy is 0.9044
after 300 training steps, the loss is 0.532562, the validation accuracy is 0.9216
after 400 training steps, the loss is 0.676842, the validation accuracy is 0.924
after 500 training steps, the loss is 0.501241, the validation accuracy is 0.9298
after 600 training steps, the loss is 0.30376, the validation accuracy is 0.925
after 700 training steps, the loss is 0.307959, the validation accuracy is 0.937
after 800 training steps, the loss is 0.382548, the validation accuracy is 0.9408
after 900 training steps, the loss is 0.34071, the validation accuracy is 0.9382
the traning is finish!
the train accuracy is : 0.94078183
the test accuracy is : 0.9415


L2正则比L1正则效果好很多，准确率为94%

## 改变初始化方式

In [46]:
train(batch_size=32, training_step=1000, lr=0.1,initial_way=MSRA, hidden1=100, lambda_flag='l2',lambda_value=0.01)

after 100 training steps, the loss is 0.88325, the validation accuracy is 0.852
after 200 training steps, the loss is 0.576833, the validation accuracy is 0.8774
after 300 training steps, the loss is 0.304504, the validation accuracy is 0.8956
after 400 training steps, the loss is 0.591831, the validation accuracy is 0.9078
after 500 training steps, the loss is 0.47441, the validation accuracy is 0.9102
after 600 training steps, the loss is 0.700314, the validation accuracy is 0.9036
after 700 training steps, the loss is 0.408681, the validation accuracy is 0.9116
after 800 training steps, the loss is 0.253816, the validation accuracy is 0.9102
after 900 training steps, the loss is 0.318185, the validation accuracy is 0.9202
the traning is finish!
the train accuracy is : 0.9107091
the test accuracy is : 0.9109


In [47]:
train(batch_size=32, training_step=1000, lr=0.1,initial_way=Xavier, hidden1=100, lambda_flag='l2',lambda_value=0.01)

after 100 training steps, the loss is 0.850773, the validation accuracy is 0.8588
after 200 training steps, the loss is 0.385894, the validation accuracy is 0.8858
after 300 training steps, the loss is 0.348053, the validation accuracy is 0.8932
after 400 training steps, the loss is 0.340069, the validation accuracy is 0.901
after 500 training steps, the loss is 0.593027, the validation accuracy is 0.9
after 600 training steps, the loss is 0.401246, the validation accuracy is 0.9138
after 700 training steps, the loss is 0.659674, the validation accuracy is 0.9118
after 800 training steps, the loss is 0.392728, the validation accuracy is 0.9166
after 900 training steps, the loss is 0.34493, the validation accuracy is 0.9152
the traning is finish!
the train accuracy is : 0.90572727
the test accuracy is : 0.911


可以看到效果也没有预期的好，甚至没有最初的模型准确率高。

## 综合以上尝试，考虑把模型复杂化，参数选择为训练20000步，lr=0.01，使用L2正则，初始化方式为高斯,使用双隐层，神经元个数为1000

In [48]:
train(batch_size=32, training_step=20000, lr=0.01, lambda_flag='l2', lambda_value=0.01,hidden1=1000, hidden2=1000)

after 100 training steps, the loss is 0.920323, the validation accuracy is 0.8296
after 200 training steps, the loss is 0.968853, the validation accuracy is 0.8774
after 300 training steps, the loss is 0.746169, the validation accuracy is 0.8988
after 400 training steps, the loss is 0.648315, the validation accuracy is 0.8982
after 500 training steps, the loss is 0.547964, the validation accuracy is 0.9106
after 600 training steps, the loss is 0.766745, the validation accuracy is 0.916
after 700 training steps, the loss is 0.792773, the validation accuracy is 0.9224
after 800 training steps, the loss is 0.681362, the validation accuracy is 0.9282
after 900 training steps, the loss is 0.417354, the validation accuracy is 0.9316
after 1000 training steps, the loss is 0.557507, the validation accuracy is 0.9332
after 1100 training steps, the loss is 0.362353, the validation accuracy is 0.9374
after 1200 training steps, the loss is 0.412011, the validation accuracy is 0.9394
after 1300 tra

after 10100 training steps, the loss is 0.271772, the validation accuracy is 0.9706
after 10200 training steps, the loss is 0.269033, the validation accuracy is 0.9686
after 10300 training steps, the loss is 0.19568, the validation accuracy is 0.9696
after 10400 training steps, the loss is 0.189362, the validation accuracy is 0.9676
after 10500 training steps, the loss is 0.110334, the validation accuracy is 0.9684
after 10600 training steps, the loss is 0.173203, the validation accuracy is 0.9702
after 10700 training steps, the loss is 0.145497, the validation accuracy is 0.9716
after 10800 training steps, the loss is 0.12446, the validation accuracy is 0.972
after 10900 training steps, the loss is 0.223523, the validation accuracy is 0.9694
after 11000 training steps, the loss is 0.205136, the validation accuracy is 0.9694
after 11100 training steps, the loss is 0.140499, the validation accuracy is 0.9702
after 11200 training steps, the loss is 0.183402, the validation accuracy is 0.

after 19900 training steps, the loss is 0.211558, the validation accuracy is 0.9728
the traning is finish!
the train accuracy is : 0.9829091
the test accuracy is : 0.9686


## 使模型变得复杂后，发现效果反而不好，准确率不高并且耗时很长，准确率也只有96%

## 经过上面多种尝试，发现只是增加了迭代次数、增加了神经元数量以及调整了学习率，

## 效果就能达到比较好的效果。结合这个，最终训练的最优模型如下

In [49]:
train(batch_size=32, training_step=20000, lr=0.1,hidden1=1000, hidden2=1000)

after 100 training steps, the loss is 0.237222, the validation accuracy is 0.9012
after 200 training steps, the loss is 0.385639, the validation accuracy is 0.9278
after 300 training steps, the loss is 0.0987822, the validation accuracy is 0.9336
after 400 training steps, the loss is 0.101503, the validation accuracy is 0.9508
after 500 training steps, the loss is 0.118876, the validation accuracy is 0.9452
after 600 training steps, the loss is 0.108847, the validation accuracy is 0.951
after 700 training steps, the loss is 0.315249, the validation accuracy is 0.9522
after 800 training steps, the loss is 0.0699681, the validation accuracy is 0.9578
after 900 training steps, the loss is 0.146829, the validation accuracy is 0.957
after 1000 training steps, the loss is 0.104086, the validation accuracy is 0.9632
after 1100 training steps, the loss is 0.189242, the validation accuracy is 0.9602
after 1200 training steps, the loss is 0.0288254, the validation accuracy is 0.9654
after 1300 t

after 9900 training steps, the loss is 0.00112749, the validation accuracy is 0.9826
after 10000 training steps, the loss is 0.00191448, the validation accuracy is 0.982
after 10100 training steps, the loss is 0.00147388, the validation accuracy is 0.9826
after 10200 training steps, the loss is 0.00134426, the validation accuracy is 0.983
after 10300 training steps, the loss is 0.00171471, the validation accuracy is 0.9826
after 10400 training steps, the loss is 0.00302469, the validation accuracy is 0.9826
after 10500 training steps, the loss is 0.00163191, the validation accuracy is 0.9836
after 10600 training steps, the loss is 0.0109626, the validation accuracy is 0.9824
after 10700 training steps, the loss is 0.00312855, the validation accuracy is 0.9832
after 10800 training steps, the loss is 0.00252614, the validation accuracy is 0.9828
after 10900 training steps, the loss is 0.000801301, the validation accuracy is 0.9834
after 11000 training steps, the loss is 0.00166564, the v

after 19500 training steps, the loss is 0.00122844, the validation accuracy is 0.9836
after 19600 training steps, the loss is 0.00188233, the validation accuracy is 0.9834
after 19700 training steps, the loss is 0.000235774, the validation accuracy is 0.9834
after 19800 training steps, the loss is 0.000775239, the validation accuracy is 0.9834
after 19900 training steps, the loss is 0.00463729, the validation accuracy is 0.9842
the traning is finish!
the train accuracy is : 1.0
the test accuracy is : 0.9828


总结：
经过上面的训练，发现，调整训练次数，增加神经元个数能够达到较好的提升训练性能。但是如果过于复杂，性能却反而不好。

本次训练最终取得的测试集准确率约为98.2%，参数为训练次数20000步，学习率0.1，双隐层结构，每个隐层神经元个数为1000个，batch_size=32。

通过上面对于正则项的添加可以看出，单纯对比的话，L2效果要好于L1正则。但是正则的作用是缩小训练集和测试集的距离，对模型最终效果提升并不是很大。
