# 自己代码实现

## 结论
* 神经网络大小[3072, 200,80, 62] epochs=500, mini_batch_size=1, eta=0.45,运行了39.0mins，training_acc: 82.1%，train_cost约0.097,test_acc:约56%,test_cost:0.327

In [104]:
import pandas as pd
import numpy as np
import random
from get_data import read_data, to_grayscale, normalise_images
import matplotlib.pyplot as plt
import time#计算代码运行时间


class dense(object):
    def __init__(self, x, neurons, activation):
        """
        全连接层的实现
        :param x: 当前层的输入
        :param neurons: 神经元数目
        :param activation: 神经元激活函数
        """
        self.x = x
        self.activation = activation
        # 初始化bias，shape=(neurons, 1)
        self.biases = np.random.randn(neurons, 1)
        # 初始化权重，shape=(x, neurons)
        self.weights = np.random.randn(len(self.x), neurons)
        self.a = np.zeros(neurons)#
        self.z = np.zeros(neurons)#

    def feedforward(self, x):
        """
        前向传播实现
        z=wx+b    a=activation(z)
        :param x: 当前层的输入
        :return: 当前层的激活值
        """
        self.x = x
        self.z = np.dot(self.weights.transpose(), self.x) + self.biases
        self.a = self.activation.activate(self.z)
        return self.a

    def backprop(self):
        """
        计算梯度
        :return:
        """
        grad = self.activation.prime(self.z)
        return grad

    def back(self, delta_w, delta_b):
        """
        更新weights和bias
        :param delta_w: 权重变化大小矩阵
        :param delta_b: 偏置变化大小矩阵
        :return:
        """
        self.weights = self.weights - delta_w
        self.biases = self.biases - delta_b


class sigmoid(object):

    @staticmethod
    def activate(z):
        """The sigmoid function."""
        return 1.0 / (1.0 + np.exp(-z))

    @staticmethod
    def prime(z):
        """sigmoid函数的微分."""
        return sigmoid.activate(z) * (1 - sigmoid.activate(z))


class QuadraticCost(object):

    @staticmethod
    def fn(a, y):
        """Return the cost associated with an output ``a`` and desired output
        ``y``.

        """
        return 0.5 * np.linalg.norm(a - y) ** 2

    @staticmethod
    def delta(a, y):
        """Return the error delta from the output layer."""
        return (a - y)


class Network(object):
    def __init__(self, sizes, cost=QuadraticCost):
        """
        神经网络结构定义
        :param sizes: 神经网络的大小；（784, 30, 10）即表示输入层是784个节点；隐藏层是30个节点；输出是10个节点
        :param cost: 代价函数
        """
        self.num_layers = len(sizes)
        self.cost = cost
        self.sizes = sizes
        self.x = np.zeros(self.sizes[0])#输入层节点数
        self.fc_layers = []
        x = self.x
        #         print('self.x:',x.shape,x)
        # 创建隐藏层和输出层
        for neurons in self.sizes[1:]:
            self.fc_layers.append(dense(x, neurons, sigmoid))
            x = self.fc_layers[-1].a
#             print('x:',x)
#             print('self.fc_layers:',len(self.fc_layers),self.fc_layers[len(self.fc_layers)-1])



    def feedforward(self, a):
        """
        前向传播，计算网络的输出
        :param a: 网络输入
        :return: 网络输出
        """
        x = a
        for layer in self.fc_layers:
            x = layer.feedforward(x)
        return self.fc_layers[-1].a

    def SGD(self, training_data, epochs, mini_batch_size, eta,
            evaluation_data=None):
        """
        随机梯度下降
        :param training_data: 训练集
        :param epochs: 训练周期
        :param mini_batch_size: 批次大小
        :param eta: 学习率参数 ，真实学习率lr=eta/len(mini_batch_size)
        :param evaluation_data: 验证集
        :return:
        """
        train_accs = []
        train_costs = []

        test_accs = []
        test_costs = []
        if evaluation_data: n_data = len(evaluation_data)
        n = 4591#len(training_data)
        for j in range(epochs):
            # 数据随机化
            random.shuffle(training_data)
            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]
#             print('mini_batches:',len(mini_batches),mini_batches[0])
            # 针对每一个批次进行梯度下降
            for mini_batch in mini_batches:
#                 print('mini_batch:',np.array(mini_batch).shape,mini_batch)
                self.update_mini_batch(mini_batch, eta)
                

            train_acc = self.evaluate(training_data, True) / n
            train_accs.append(train_acc)
            train_cost = self.total_cost(training_data)
            train_costs.append(train_cost)
            print("Epoch {} training, Acc: {}, cost: {}:".format(j, train_acc, train_cost))
            # 打印出正确分类的个数
            if evaluation_data:
                test_correct_count = self.evaluate(evaluation_data)
                test_acc = test_correct_count/n_data
                test_cost = self.total_cost(evaluation_data)#去掉, convert=True
                test_accs.append(test_acc)
                test_costs.append(test_cost)
                print("Epoch {} evaluate: {} / {}, Acc: {}, cost: {}".format(j, self.evaluate(evaluation_data), n_data, test_acc, test_cost))
            else:
                print("Epoch {} complete".format(j))
        return train_accs, train_costs, test_accs, test_costs

    def update_mini_batch(self, mini_batch, eta):
        """
        梯度下降，向后传播更新参数
        :param mini_batch: 批次数据
        :param eta: 学习率参数
        :return:
        """
        lr = eta / len(mini_batch)
        for x, y in mini_batch:
            # 前向传播计算每层的输出
            self.feedforward(x)
            # 计算输出层的梯度
            delta = self.cost.delta(self.fc_layers[-1].a, y) * self.fc_layers[-1].backprop()
            delta_w = np.dot(self.fc_layers[-2].a, delta.transpose())
            self.fc_layers[-1].back(lr * delta_w, eta / lr * delta)
            # 当网络结构多一个隐藏层
            if self.num_layers > 3:
                for layer in range(2, self.num_layers - 1):
                    delta = np.dot(self.fc_layers[-layer + 1].weights, delta) * self.fc_layers[
                        -layer].backprop()
                    delta_w = np.dot(self.fc_layers[-layer - 1].a, delta.transpose())
                    self.fc_layers[-layer].back(lr * delta_w, lr * delta)

            # 三层结构
            else:
                delta = np.dot(self.fc_layers[-1].weights, delta) * self.fc_layers[
                    -2].backprop()
                delta_w = np.dot(x, delta.transpose())
                self.fc_layers[-2].back(lr * delta_w, lr * delta)

    def evaluate(self, test_data, convert=False):
        """统计出来正确分类的个数;argmax表示数组中最大的值的位置；因为最终output的结果是0,1向量，只有一个值为1，也就是判别的种类"""
        if not convert:
            test_results = [(np.argmax(self.feedforward(x)), np.argmax(y))
                            for (x, y) in test_data]
#             print('False:',len(test_results),len(test_results[0]),test_results)
        else:
            test_results = [(np.argmax(self.feedforward(x)), np.argmax(y))
                       for (x, y) in test_data]
#             print('True:',len(test_results),len(test_results[0]),test_results)
        return sum(int(x == y) for (x, y) in test_results)

    def total_cost(self, data, convert=False):
        """
        所有的损失值
        """
        cost = 0.0
        for x, y in data:
            a = self.feedforward(x)
            if convert: y = vectorized_result(y)
            cost += self.cost.fn(a, y) / len(data)
        return cost


def vectorized_result(j):
    """Return a 62-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((62, 1))
    e[j] = 1.0
    return e


def main():

    #读取数据，并将数据转换为可输入BP神经网络的格式
    train_x, train_y = read_data('./data/BelgiumTSC_Training.zip', 'Training/')
#     print('training data shape:',train_x.shape,train_y.shape)#train_x大小为(4591, 32, 32, 3),train_y大小为4591*1
    #将训练集转换为可输入BP神经网络的格式
    training_inputs = [np.reshape(x, (3072, 1)) for x in train_x]
#     print(len(training_inputs),len(training_inputs[0]))
    training_results = [vectorized_result(y) for y in train_y]
#     print(len(training_results),len(training_results[0]))
    training_data= list(zip (training_inputs, training_results))
#     print(len(training_data),len(training_data[0]),training_data[0])
    #将测试集转换为可输入BP神经网络的格式
    test_x, test_y = read_data('./data/BelgiumTSC_Testing.zip', 'Testing/')
#     print('test data shape:',test_x.shape,test_y.shape)#test_x大小为(2534, 32, 32, 3),test_y大小为2534*1
    test_inputs = [np.reshape(x, (3072, 1)) for x in test_x]
#     print(len(test_inputs),len(test_inputs[0]))
    test_results = [vectorized_result(y) for y in test_y]
#     print(len(test_results),len(test_results[0]))
    test_data= list(zip (test_inputs, test_results))
#     print(len(test_data),len(test_data[0]),test_data[0])
    
    #神经网络模型
    Model=Network([3072, 200,80, 62], cost=QuadraticCost)#Model为神经网路模型，输入层是3072个节点；输出层是62个节点；隐藏层初始值≈sqrt(3072*62）个节点；
    train_accs, train_costs, test_accs, test_costs=Model.SGD(training_data=training_data, epochs=50, mini_batch_size=1, eta=0.45,evaluation_data=test_data)


if __name__ == '__main__':
    start = time.clock()
    main()
    end = time.clock()
    print('process time:',str((end - start)/60),'mins')




Epoch 0 training, Acc: 0.00326726203441516, cost: 0.9799287255404499:
Epoch 0 evaluate: 6 / 2534, Acc: 0.0023677979479084454, cost: 0.9764119156325322
Epoch 1 training, Acc: 0.00326726203441516, cost: 0.9788993080409958:
Epoch 1 evaluate: 6 / 2534, Acc: 0.0023677979479084454, cost: 0.9798358673589369
Epoch 2 training, Acc: 0.00326726203441516, cost: 0.9570850026703241:
Epoch 2 evaluate: 6 / 2534, Acc: 0.0023677979479084454, cost: 0.9468523744728647
Epoch 3 training, Acc: 0.19255064256153342, cost: 0.4577245344125543:
Epoch 3 evaluate: 680 / 2534, Acc: 0.26835043409629045, cost: 0.4308914431648413
Epoch 4 training, Acc: 0.21520365933347854, cost: 0.4471292628700074:
Epoch 4 evaluate: 672 / 2534, Acc: 0.26519337016574585, cost: 0.44268145710116513
Epoch 5 training, Acc: 0.21759965149204966, cost: 0.4364255936070286:
Epoch 5 evaluate: 667 / 2534, Acc: 0.2632202052091555, cost: 0.42343330339700286
Epoch 6 training, Acc: 0.25049008930516226, cost: 0.4223572398068448:
Epoch 6 evaluate: 852 /

## 测试结果

### 神经网络大小[3072, 436, 62]
* epochs=50, mini_batch_size=1, eta=0.1 计算很慢，training_data的准确率只有约2.1%
* epochs=200, mini_batch_size=10, eta=0.5,运行了6个小时，training_data的准确率最高只有约4.8%
* epochs=10, mini_batch_size=10, eta=1,运行了18mins，training_data的准确率最高只有约0.9%
* epochs=10, mini_batch_size=1, eta=1,运行了18mins，training_data的准确率最高只有约4.2%

### 神经网络大小[3072, 200, 62]
* epochs=10, mini_batch_size=1, eta=1,运行了7.8mins，training_data的准确率最高只有约1.8%

### 神经网络大小[3072, 600, 62]
* epochs=10, mini_batch_size=1, eta=1,运行了  mins，training_data的准确率最高只有约6.9%

### 神经网络大小[3072, 1024, 62]
* epochs=2, mini_batch_size=1, eta=1，运行了  mins，training_data的准确率最高约8.1%

### 神经网络大小[3072, 200,100, 62]
* epochs=20, mini_batch_size=1, eta=1，运行了1.2mins，training_data的准确率最高约5.6%
* epochs=20, mini_batch_size=1, eta=0.5,运行了1.2mins，training_data的准确率最高约47.0%
* epochs=20, mini_batch_size=1, eta=0.6,运行了1.2mins，training_data的准确率最高约42.1%
* epochs=50, mini_batch_size=1, eta=0.5,运行了3.2mins，training_data的准确率最高约53.4%

### 神经网络大小[3072, 200,80, 62]
* epochs=10, mini_batch_size=1, eta=0.5,运行了0.71mins，training_data的准确率最高约44.5%
* epochs=50, mini_batch_size=1, eta=0.5,运行了2.6mins，training_data的准确率最高约49.9%,train_cost约0.33, test_acc:50%,test_cost:0.35
* epochs=50, mini_batch_size=1, eta=0.45,运行了4.7mins，training_data的准确率最高约53.4%,train_cost约0.28, test_acc:45.3%,test_cost:0.36 
* epochs=200, mini_batch_size=1, eta=0.5,运行了10.5mins，training_data的准确率最高约70.6%
* epochs=500, mini_batch_size=1, eta=0.45,运行了39.0mins，training_acc: 82.1%，train_cost约0.097,test_acc:约56%,test_cost:0.327

### 神经网络大小[3072, 200,70, 62]
* epochs=50, mini_batch_size=1, eta=0.5,运行了2.7mins，training_data的准确率最高约63.0%，train_cost约0.26
* epochs=500, mini_batch_size=1, eta=0.45，运行了25.7mins，training_data的准确率最高约77.9%，train_cost约0.11 

### 神经网络大小[3072, 200,62, 62]
* epochs=50, mini_batch_size=1, eta=0.45,运行了3.7mins，training_acc:66.1%，train_cost约0.21,test_acc:约53.2%,test_cost:0.31 

### 神经网络大小[3072, 200,50, 62]
* epochs=50, mini_batch_size=1, eta=0.45,运行了3.4mins，training_acc:65.2%，train_cost约0.26,test_acc:约53.7%,test_cost:0.33

### 神经网络大小[3072, 200,30, 62]
* epochs=50, mini_batch_size=1, eta=0.45,运行了3.2mins，training_acc:61.2%，train_cost约0.26,test_acc:约49.6%,test_cost: 0.33

### 神经网络大小[3072, 250,80, 62]
* epochs=50, mini_batch_size=1, eta=0.5,运行了4.2mins，training_data的准确率最高约0.6%，train_cost约0.76

### 神经网络大小[3072, 180,80, 62]
* epochs=50, mini_batch_size=1, eta=0.5,运行了2.3mins，training_data的准确率最高约60.0%，train_cost约0.25

### 神经网络大小[3072, 150,80, 62]
* epochs=50, mini_batch_size=1, eta=0.5,运行了2.0mins，training_data的准确率最高约63.4%，train_cost约0.24
* epochs=500, mini_batch_size=1, eta=0.45，运行了27.6mins，training_acc: 76.6%,train_cost约0.118, test_acc:52.9%,test_cost:0.34

### 神经网络大小[3072, 120,80, 62]
* epochs=50, mini_batch_size=1, eta=0.5,运行了1.6mins，training_data的准确率最高约60.2%，train_cost约0.25 

### 神经网络大小[3072, 100,80, 62]
* epochs=50,mini_batch_size=1,eta=0.5,运行了1.4mins，training_data的准确率最高约51.2%，train_cost约0.31
* epochs=500,mini_batch_size=1,eta=0.45,运行了23.2mins，training_acc:73.1%，train_cost约0.135，test_acc:约52%, test_cost: 0.363

### 神经网络大小[3072,400,100,62]
* epochs=10, mini_batch_size=1, eta=0.7,运行了2.6mins，training_data的准确率最高约26.9%

### 神经网络大小[3072, 400, 200, 62]
* epochs=5, mini_batch_size=1, eta=0.5,运行了1.54mins，training_data的准确率最高约0.2%
* epochs=5, mini_batch_size=1, eta=1,运行了1.54mins，training_data的准确率最高约5.6%

### 神经网络大小[3072, 300, 200,100, 62]
* epochs=10, mini_batch_size=1, eta=0.3,运行了1.8mins，training_data的准确率最高约39%

### 神经网络大小[3072, 400, 200,100,62]
* epochs=10, mini_batch_size=1, eta=0.5,运行了3.3mins，training_data的准确率最高约28.4%
* epochs=50, mini_batch_size=1, eta=0.5,运行了16.4mins，training_data的准确率最高约32.7%

### 上面的代码两层神经网络（只有一个隐藏层），计算时间极长；

# 使用TensorFlow实现

## 结论
* 网络大小[3072,600,62],输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了28.6mins，train_acc:96.3% ,test_acc:70.4%
* 层数越多，中间层越大，越容易过拟合；但是Training_acc越高，一般情况Test_acc也会略高一点

In [102]:
import tensorflow as tf
import numpy as np
import random
from get_data import read_data, to_grayscale, normalise_images

#用于计时
start = time.clock()

#读取数据
train_x, train_y = read_data('./data/BelgiumTSC_Training.zip', 'Training/')
training_inputs = np.array([np.reshape(x, (3072, 1)) for x in train_x])
# print(training_inputs.shape)
training_results = np.array([vectorized_result(y) for y in train_y])
# print(training_results.shape,len([vectorized_result(y) for y in train_y]))
training_data= np.array(list((zip (training_inputs, training_results))))
test_x, test_y = read_data('./data/BelgiumTSC_Testing.zip', 'Testing/')
test_inputs =np.array([np.reshape(x, (3072, 1)) for x in test_x]) 
test_results = np.array([vectorized_result(y) for y in test_y])
test_data= np.array(list(zip (test_inputs, test_results)))

#none可以是任何size；也即，训练集的个数不确定；列数为3072
x = tf.placeholder(tf.float32, [None, 3072])
y = tf.placeholder(tf.float32, [None, 62])



# 权重和偏置；增加一层
# 初始值不能为0；否则无法改变权重
W1 = tf.Variable(tf.truncated_normal([3072, 800],  stddev=0.5, dtype=tf.float32))
B1 = tf.Variable(tf.zeros([800]))

W2 = tf.Variable(tf.truncated_normal([800,62], stddev=0.3, dtype=tf.float32))
B2 = tf.Variable(tf.zeros([62]))

# W3 = tf.Variable(tf.truncated_normal([60,62], stddev=0.1, dtype=tf.float32))
# B3 = tf.Variable(tf.zeros([62]))

# W4 = tf.Variable(tf.truncated_normal([50,62], stddev=0.5, dtype=tf.float32))
# B4 = tf.Variable(tf.zeros([62]))

#给定输入，计算输出
layer1 = tf.nn.sigmoid(tf.matmul(x, W1) + B1)
# layer2 = tf.nn.softmax(tf.matmul(layer1, W2) + B2)
# layer3 = tf.nn.softmax(tf.matmul(layer2, W3) + B3)
H = tf.nn.softmax(tf.matmul(layer1, W2) + B2)

# 损失函数，交叉熵
# cross_entropy = tf.reduce_mean(((y * tf.log(H)) +((1 - y) * tf.log(1.0 - H))) * -1)#sigmoid对应的损失函数
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(H), reduction_indices=[1]))#softmax对应的损失函数

#梯度下降，步长为0.1
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(cross_entropy)

# 初始化
init = tf.global_variables_initializer()
# n=len(training_data)
n=training_data.shape[0]
n_test=test_data.shape[0]
# print(n)
# mini_batch_size=1

with tf.Session() as s:
    s.run(init)
    # train
    for i in range(2000):
        #数据调整
        train_xs=np.array([data[0] for data in training_data]).reshape(n,3072)
        train_ys=np.array([data[1] for data in training_data]).reshape(n,62)
        s.run(train_step, feed_dict={x: train_xs, y: train_ys})

        if i%10 == 0:
            train_correct_prediction = tf.equal(tf.argmax(H, 1), tf.argmax(y, 1))
            train_accuracy = tf.reduce_mean(tf.cast(train_correct_prediction, "float"))#tf.cast:类型转换函数
#             train_xs=np.array([data[0] for data in test_data]).reshape(n_test,3072)
#             test_ys=np.array([data[1] for data in test_data]).reshape(n_test,62)
            print("Train_acc: ", i,s.run(train_accuracy, feed_dict={x:train_xs, y:train_ys}))            
        
            correct_prediction = tf.equal(tf.argmax(H, 1), tf.argmax(y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))#tf.cast:类型转换函数
            test_xs=np.array([data[0] for data in test_data]).reshape(n_test,3072)
            test_ys=np.array([data[1] for data in test_data]).reshape(n_test,62)
            print("Test_acc: ", i,s.run(accuracy, feed_dict={x:test_xs, y:test_ys}))

#用于计时
end = time.clock()
print('process time:',str((end - start)/60),'mins')

Train_acc:  0 0.026791548
Test_acc:  0 0.044988163
Train_acc:  10 0.13613592
Test_acc:  10 0.10773481
Train_acc:  20 0.21607493
Test_acc:  20 0.17363852
Train_acc:  30 0.28839034
Test_acc:  30 0.22296764
Train_acc:  40 0.34001306
Test_acc:  40 0.2573007
Train_acc:  50 0.38989326
Test_acc:  50 0.28295186
Train_acc:  60 0.42038772
Test_acc:  60 0.3180742
Train_acc:  70 0.45698106
Test_acc:  70 0.34411997
Train_acc:  80 0.47636682
Test_acc:  80 0.3670087
Train_acc:  90 0.50315833
Test_acc:  90 0.37529597
Train_acc:  100 0.5223263
Test_acc:  100 0.39029202
Train_acc:  110 0.53953385
Test_acc:  110 0.40213102
Train_acc:  120 0.5597909
Test_acc:  120 0.41397
Train_acc:  130 0.5759094
Test_acc:  130 0.43133387
Train_acc:  140 0.59812677
Test_acc:  140 0.43725336
Train_acc:  150 0.60509694
Test_acc:  150 0.44672453
Train_acc:  160 0.62404704
Test_acc:  160 0.4558011
Train_acc:  170 0.6355914
Test_acc:  170 0.45777428
Train_acc:  180 0.643215
Test_acc:  180 0.46724546
Train_acc:  190 0.6595513


Test_acc:  1560 0.6578532
Train_acc:  1570 0.96297103
Test_acc:  1570 0.6582478
Train_acc:  1580 0.9631888
Test_acc:  1580 0.65745854
Train_acc:  1590 0.9636245
Test_acc:  1590 0.6578532
Train_acc:  1600 0.9636245
Test_acc:  1600 0.6582478
Train_acc:  1610 0.9644958
Test_acc:  1610 0.65982634
Train_acc:  1620 0.9647136
Test_acc:  1620 0.660221
Train_acc:  1630 0.96493137
Test_acc:  1630 0.660221
Train_acc:  1640 0.965367
Test_acc:  1640 0.6606156
Train_acc:  1650 0.965367
Test_acc:  1650 0.66101027
Train_acc:  1660 0.96602046
Test_acc:  1660 0.6606156
Train_acc:  1670 0.9664561
Test_acc:  1670 0.66101027
Train_acc:  1680 0.967763
Test_acc:  1680 0.66101027
Train_acc:  1690 0.96798086
Test_acc:  1690 0.66101027
Train_acc:  1700 0.9688521
Test_acc:  1700 0.66101027
Train_acc:  1710 0.96928775
Test_acc:  1710 0.66101027
Train_acc:  1720 0.96950555
Test_acc:  1720 0.6614049
Train_acc:  1730 0.96972334
Test_acc:  1730 0.6614049
Train_acc:  1740 0.96972334
Test_acc:  1740 0.6614049
Train_acc

## 测试结果
### [3072,400,80,62]
* test_acc:15%-16%

### [3072,400,80,62]
* alpha=0.2,test_acc:2.4%

### [3072,400,60,62]
* alpha=0.005,test_acc:2.7%
* alpha=0.05,test_acc:4.1%
* alpha=0.01,test_acc:16.6%
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了21.3mins，train_acc:15.3% ,test_acc:19.3%

### [3072,800,62]
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了39.5mins，train_acc:97.8% ,test_acc:66.8%

### [3072,600,62]
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了28.6mins，train_acc:96.3% ,test_acc:70.4%

### [3072,500,62]
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了30.4mins，train_acc:95.6% ,test_acc:69.9%

### [3072,400,62]
* 激活函数都是softmax,alpha=0.1,epochs=2000,运行了18.5mins，train_acc:33.5% ,test_acc:34.9%
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=200,运行了2.1mins，train_acc:55.7% ,test_acc:46.8%
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了24.3mins，train_acc:93.6% ,test_acc:69.1%

### [3072,200,62]
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=200,运行了2.1mins，train_acc:46.8% ,test_acc:41.1%
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了13mins，train_acc:87.0% ,test_acc:68.0%

### [3072,100,62]
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了11.7mins，train_acc:77.2% ,test_acc:62.9%

### [3072,120,62]
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了21.2mins，train_acc:80.5% ,test_acc:63.9%

### [3072,150,62]
* 输出层是softmax,其他激活函数是sigmoid,alpha=0.1,epochs=2000,运行了15.3mins，train_acc:81.1% ,test_acc:67.2%

### 同样的神经网络模型，tensorflow和自己编写的代码，准确率差别很大；