In [94]:
import numpy as np
# import cupy as np

In [95]:
# 加载数据集
mnist=np.load('mnist/mnist.npz')
train_images, train_labels = mnist['x_train'], mnist['y_train']
test_images, test_labels = mnist['x_test'], mnist['y_test']


In [96]:
train_labels = train_labels.astype(int)
test_labels = test_labels.astype(int)
train_labels = np.eye(10)[train_labels]
test_labels = np.eye(10)[test_labels]
print(train_images.shape, train_labels.shape)
# 将图像数据转换为浮点数，并归一化到0-1之间
train_images = train_images.astype(np.float32) / 255.0
test_images = test_images.astype(np.float32) / 255.0

(60000, 28, 28) (60000, 10)


In [97]:
# 将图像数据展平为一维数组
train_images = train_images.reshape(-1, 28*28)
test_images = test_images.reshape(-1, 28*28)

In [98]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size,learning_rate = 0.01):
        self.weights1 = np.random.randn(input_size, hidden_size) * np.sqrt(1 / input_size)
        self.weights2 = np.random.randn(hidden_size, output_size) * np.sqrt(1 / hidden_size)
        self.bias1 = np.zeros((1, hidden_size))
        self.bias2 = np.zeros((1, output_size))
        self.learning_rate=learning_rate

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def forward(self, x):
        self.output1 = self.sigmoid(np.dot(x, self.weights1) + self.bias1)
        self.output2 = self.sigmoid(np.dot(self.output1, self.weights2) + self.bias2)
        return self.output2
    
    def backward(self, x, true, output):
        error = output-true
        output2_derivative = error * self.sigmoid_derivative(output)
        error1 = output2_derivative.dot(self.weights2.T)
        output1_derivative = error1 * self.sigmoid_derivative(self.output1)    
        self.weights2 -= self.learning_rate*self.output1.T.dot(output2_derivative)
        self.weights1 -= self.learning_rate*x.reshape(784, 1).dot(output1_derivative)
        self.bias2 -=self.learning_rate* np.sum(output2_derivative, axis=0, keepdims=True)
        self.bias1 -=self.learning_rate* np.sum(output1_derivative, axis=0, keepdims=True)
    
    def cross_entropy_loss(self,output, true):
        delta = 1e-7
        return -np.sum(true * np.log(output), axis=1)

In [99]:
def evaluate_nn(nn, test_images, test_labels):
    correct = 0
    for image, label in zip(test_images, test_labels):
        output = nn.forward(image)
        predicted = np.argmax(output)
        actual = np.argmax(label)
        if predicted == actual:
            correct += 1
    accuracy = correct / len(test_labels)
    print(f"Accuracy: {accuracy * 100}%")

In [100]:
def train_nn(nn, train_images, train_labels, epochs):
    for epoch in range(epochs):
        for image, label in zip(train_images, train_labels):
            output = nn.forward(image)
            nn.backward(image, label, output)
        print(f"Epoch {epoch} complete")
        evaluate_nn(nn, test_images[epoch*10:(epoch+1)*10], test_labels[epoch*10:(epoch+1)*10])

In [101]:
# 创建神经网络实例
input_size = 28 * 28
hidden_size = 64
output_size = 10
learning_rate=0.01
nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)

# 训练神经网络
train_nn(nn, train_images, train_labels, epochs=100)



Epoch 0 complete
Accuracy: 90.0%
Epoch 1 complete
Accuracy: 100.0%
Epoch 2 complete
Accuracy: 100.0%
Epoch 3 complete
Accuracy: 80.0%
Epoch 4 complete
Accuracy: 100.0%
Epoch 5 complete
Accuracy: 100.0%
Epoch 6 complete
Accuracy: 100.0%
Epoch 7 complete
Accuracy: 100.0%
Epoch 8 complete
Accuracy: 100.0%
Epoch 9 complete
Accuracy: 100.0%
Epoch 10 complete
Accuracy: 100.0%
Epoch 11 complete
Accuracy: 100.0%
Epoch 12 complete
Accuracy: 90.0%
Epoch 13 complete
Accuracy: 100.0%
Epoch 14 complete
Accuracy: 90.0%
Epoch 15 complete
Accuracy: 100.0%
Epoch 16 complete
Accuracy: 100.0%
Epoch 17 complete
Accuracy: 100.0%
Epoch 18 complete
Accuracy: 100.0%
Epoch 19 complete
Accuracy: 100.0%
Epoch 20 complete
Accuracy: 100.0%
Epoch 21 complete
Accuracy: 90.0%
Epoch 22 complete
Accuracy: 100.0%
Epoch 23 complete
Accuracy: 100.0%
Epoch 24 complete
Accuracy: 80.0%
Epoch 25 complete
Accuracy: 90.0%
Epoch 26 complete
Accuracy: 100.0%
Epoch 27 complete
Accuracy: 100.0%
Epoch 28 complete
Accuracy: 100.0%
Ep

In [104]:
# 评估神经网络
evaluate_nn(nn, test_images, test_labels)



Accuracy: 97.37%


In [103]:
import json
# 保存实例到文件
# with open('nn.json', 'w') as f:
#     f.write(nn.to_json())

# 从文件加载实例
# with open('nn.json', 'r') as f:
#     data = json.load(f)
#     loaded_instance = NeuralNetwork(**data)
#     print(loaded_instance)