In [304]:
import numpy as np
import pandas as pd
import scipy.special

In [305]:
class neuralNetwork:

    def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        self.vih = np.random.normal(0.0, pow(self.inodes, -0.5),
                                    (self.hnodes, self.inodes+1)) # 初始化输入层和隐层之间的权重矩阵
        self.whj = np.random.normal(0.0, pow(self.hnodes, -0.5),
                                    (self.onodes, self.hnodes+1)) # 初始化隐层和输出层之间的权重矩阵
        self.lr = learningrate
        self.activation_function = lambda x: scipy.special.expit(x) # sigmoid激活函数

    def train(self, inputs_list, targets_list):
        inputs = inputs_list # 输入层输出
        targets = targets_list.reshape(-1, 1)

        hidden_inputs = np.dot(self.vih, inputs) # 隐藏层输入
        hidden_outputs = self.activation_function(hidden_inputs)
        hidden_outputs = np.concatenate((hidden_outputs, np.array([[1]])), axis=0) # 隐层输出
        final_inputs = np.dot(self.whj, hidden_outputs) # 输出层输入
        final_outputs = self.activation_function(final_inputs) # 输出层输出

        A_2 = np.diag((final_outputs * (1-final_outputs)).reshape(-1, )) # 计算A_2
        delta_2 = A_2 @ (final_outputs - targets) # 计算\mathbf{\delata}_2
        change_whj = delta_2 @ hidden_outputs.T
        self.whj = self.whj - self.lr * change_whj # 更新隐层和输出层之间的权重矩阵

        A_1 = np.diag((hidden_outputs[:-1] * (1-hidden_outputs[:-1])).reshape(-1, ))
        A_1 = np.concatenate((A_1, np.zeros((hidden_nodes, 1))), axis=1) # 计算A_1
        change_vih = A_1 @ self.whj.T @ delta_2 @ inputs.reshape(1, -1)
        self.vih = self.vih - self.lr * change_vih # 更新输入层和隐层之间的权重矩阵

    def query(self, inputs_list):
        inputs = inputs_list # 输入层输出
        hidden_inputs = np.dot(self.vih, inputs) # 隐藏层输入
        hidden_outputs = self.activation_function(hidden_inputs)
        hidden_outputs = np.concatenate((hidden_outputs, np.array([[1]])), axis=0) # 隐藏层输出
        final_inputs = np.dot(self.whj, hidden_outputs) # 输出层输入
        final_outputs = self.activation_function(final_inputs) # 输出层输出

        return final_outputs

In [306]:
input_nodes = 784 # 输入层神经元个数
hidden_nodes = 200 # 隐藏层神经元个数
output_nodes = 10 # 输出层神经元个数(对应10个标签)

learning_rate = 0.01 # 学习率

# create instance of neural network
n = neuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate)

In [307]:
training_data = pd.read_csv('mnist_train_6000.csv', header=None).values
epochs = 20 # 训练轮数
for e in range(epochs):
    for i in range(training_data.shape[1]):
        inputs = (training_data[i, 1:] / 255.0 * 0.99) + 0.01
        inputs = np.concatenate((inputs.reshape(-1, 1), np.array([[1]])), axis=0) # 带偏置

        # 独热编码
        targets = np.zeros(output_nodes) + 0.001
        targets[int(training_data[i, 0])] = 0.999

        n.train(inputs, targets)

In [308]:
testing_data = pd.read_csv('mnist_test_1000.csv', header=None).values
scorecard = []

for j in range(testing_data.shape[0]):
    correct_label = testing_data[j, 0]
    inputs = (testing_data[j, 1:] / 255.0 * 0.99) + 0.01
    inputs = np.concatenate((inputs.reshape(-1, 1), np.array([[1]])), axis=0)
    outputs = n.query(inputs)
    label = np.argmax(outputs)
    if label == correct_label:
        scorecard.append(1)
    else:
        scorecard.append(0)

scorecard_array = np.array(scorecard)
print ("performance = ", scorecard_array.sum() / scorecard_array.size)


performance =  0.798
