In [1]:
import numpy as np
import pandas as pd

In [2]:
train_df = pd.read_csv("mnist_train.csv",header=None)
test_df = pd.read_csv("mnist_test.csv",header=None)
train_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59997,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9996,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9997,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9998,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def grad(x):
    return x*(1-x)

class NeuralNetwork:
    """
    三层全连接前馈神经网络
    """
    
    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate, active_function=sigmoid, gradient=grad, lambda_=0.1):
        """

        :param inputnodes: 输入层结点数
        :param hiddennodes: 隐藏层节点数
        :param outputnodes: 输出层节点数
        :param learningrate: 学习率
        :param active_function: 激活函数
        :param gradient: 激活函数的导数
        :param lambda_: L2正则化系数
        """
        self.inputnodes = inputnodes
        self.hiddennodes = hiddennodes
        self.outputnodes = outputnodes
        self.learningrate = learningrate
        self.active_function = active_function
        self.gradient = gradient
        self.lambda_ = lambda_
        
        # 权值矩阵
        self.weights_i_h = np.random.rand(self.hiddennodes, self.inputnodes) - 0.5 
        self.weights_h_o = np.random.rand(self.outputnodes, self.hiddennodes) - 0.5
        
    def train_sgd(self, x, y):
        """梯度下降训练"""
        train_x = np.array(x).reshape(-1,1)
        target = np.zeros((self.outputnodes,1)) + 0.01
        target[y,0] = 0.99
        
        hiddeninputs = np.dot(self.weights_i_h, train_x)
        hiddenoutputs = self.active_function(hiddeninputs)
        
        outputinputs = np.dot(self.weights_h_o, hiddenoutputs)
        final_outputs = self.active_function(outputinputs)
        
        error = target - final_outputs
        
        hidden_error = np.dot(self.weights_h_o.transpose(), error)
        
        self.weights_h_o += self.learningrate * error * np.dot(self.gradient(final_outputs), hiddenoutputs.transpose())
        
        self.weights_i_h += self.learningrate * hidden_error * np.dot(self.gradient(hiddenoutputs), train_x.transpose()) 
    
    def fit(self, train_x, targets):
        train_x = np.array(train_x)
        for i in range(train_x.shape[0]):
            self.train(train_x[i], targets[i])
    
    def query(self, inputs):
        """单个值预测"""
        inputs = np.array(inputs).reshape(-1,1)
        hidden_input = np.dot(self.weights_i_h, inputs)
        hidden_output = self.active_function(hidden_input)
        
        output_input = np.dot(self.weights_h_o, hidden_output)
        
        final_output = self.active_function(output_input)
#         print('predict: ', final_output)
        return np.argmax(final_output)
    
    def predict(self,inputs):
        """批量预测"""
        res = []
        for x in inputs:
            res.append(self.query(x))
        return res
    
    def __str__(self):
        return "NeuralNetwork: \ninput_nodes = {0}, hidden_nodes = {1}, \noutputnodes = {2}, learningrate = {3}".format(
            self.inputnodes, self.hiddennodes, self.outputnodes, self.learningrate
        )
if __name__ == '__main__':
    nn = NeuralNetwork(3,3,3,0.5)
    print(nn)

NeuralNetwork: 
input_nodes = 3, hidden_nodes = 3, 
outputnodes = 3, learningrate = 0.5


In [61]:
# 用测试数据测试
def accuracy(y_true, y_pred):
    """准确度"""
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return sum(y_true==y_pred)/y_true.shape[0]

#用全部数据进行训练
def get_data():
    train_df = pd.read_csv("mnist_train.csv",header=None)
    test_df = pd.read_csv("mnist_test.csv",header=None)
    
    train_data = np.array(train_df)
    train_x = train_data[:,1:]
    train_y = train_data[:,0]
    train_x = train_x/255 *0.99  + 0.01
    
    test_data = np.array(test_df)
    test_x = test_data[:,1:]
    test_y = test_data[:,0]
    test_x = test_x/255 * 0.99 + 0.01
    
    return train_x, train_y, test_x, test_y

train_x, train_y, test_x, test_y = get_data()
    
NN = NeuralNetwork(784, 100, 10, 0.3)
NN.fit(train_x, train_y)
y_pred = NN.predict(test_x)
print("准确度%.2f%%"%(100*accuracy(test_y, y_pred))) 

准确度94.13%


In [63]:
hiddennodes = [512,256,128]
lrs = [0.1, 0.2, 0.3]
for node in hiddennodes:
    for lr in lrs:
        NN = NeuralNetwork(784, node, 10, lr)
        NN.fit(train_x, train_y)
        y_pred = NN.predict(test_x)
        print("隐藏层节点数%d,学习率%f,准确度%.2f%%"%(node, lr, 100*accuracy(test_y, y_pred))) 

隐藏层节点数512,学习率0.100000,准确度95.98%
隐藏层节点数512,学习率0.200000,准确度95.67%
隐藏层节点数512,学习率0.300000,准确度95.24%
隐藏层节点数256,学习率0.100000,准确度95.94%
隐藏层节点数256,学习率0.200000,准确度96.00%
隐藏层节点数256,学习率0.300000,准确度94.84%
隐藏层节点数128,学习率0.100000,准确度95.54%
隐藏层节点数128,学习率0.200000,准确度95.22%
隐藏层节点数128,学习率0.300000,准确度94.26%


In [76]:
import pickle
# 最佳参数
# 隐藏层节点数256,学习率0.200000,准确度96.00%
NN = NeuralNetwork(784, 256, 10, 0.2)
# 训练10次，每3次训练下降一次学习率
for e in range(1,11):
    if e%3==0:
        NN.learningrate/=2
    NN.fit(train_x, train_y)
    y_pred = NN.predict(test_x)
    print("第%d次训练,准确度%.2f%%"%(e,100*accuracy(test_y, y_pred))) 
    with open('NN{}.pkl'.format(e), 'wb') as f:  # 保存模型
        pickle.dump(pickle.dumps(NN), f)


第1次训练,准确度95.45%
第2次训练,准确度96.42%
第3次训练,准确度97.16%
第4次训练,准确度97.20%
第5次训练,准确度97.28%
第6次训练,准确度97.50%
第7次训练,准确度97.50%
第8次训练,准确度97.51%
第9次训练,准确度97.56%
第10次训练,准确度97.58%


In [77]:
# 最佳模型，载入最佳模型
with open('NN10.pkl','rb') as f:
    b_data = pickle.load(f)
    net_model = pickle.loads(b_data)
net_model

<__main__.NeuralNetwork at 0x251a6f2aa48>

In [78]:
print(net_model)

NeuralNetwork: 
input_nodes = 784, hidden_nodes = 256, 
outputnodes = 10, learningrate = 0.025
