In [1]:
import Ipynb_importer
import numpy as np
import gzip
import function

importing Jupyter notebook from function.ipynb


In [2]:
# 参数定义
learning_rate = 1e-3
num_epoch = 50
batch_size = 50
shuffle = True

In [3]:
# 训练图像、训练标签路径
train_images_filename = './data/train-images-idx3-ubyte.gz'
train_labels_filename = './data/train-labels-idx1-ubyte.gz'

In [4]:
# 加载训练图像
tarin_images_loader = function.load_mnist_images(train_images_filename)

# print(np.shape(tarin_images_loader))
# print(type(tarin_images_loader))

In [5]:
# 加载训练标签
tarin_labels_loader = function.load_mnist_labels(train_labels_filename)

# print(np.shape(tarin_labels_loader))
# print(type(tarin_labels_loader))

In [6]:
# 获得batch数据以及batch数量
train_images_batches, train_labels_batches = function.get_mini_batches(tarin_images_loader, tarin_labels_loader, batch_size, shuffle)
num_batch = np.shape(train_images_batches)[0]

# print(np.shape(train_images_batches))
# print(np.shape(train_labels_batches))
# print(type(train_images_batches))
# print(type(train_labels_batches))

In [7]:
# 初始化权重以及偏置，权重和偏置的范围为0~0.1
layer1_weights = np.random.normal(loc=0.0, scale=0.01, size=(28*28, 256))
layer2_weights = np.random.normal(loc=0.0, scale=0.01, size=(256, 128))
layer3_weights = np.random.normal(loc=0.0, scale=0.01, size=(128, 64))
layer4_weights = np.random.normal(loc=0.0, scale=0.01, size=(64, 10))
layer1_bias = np.zeros([1, 256])
layer2_bias = np.zeros([1, 128])
layer3_bias = np.zeros([1, 64])
layer4_bias = np.zeros([1, 10])

In [8]:
# 训练
for epoch in range(num_epoch):
    
    for batch in range(num_batch):
        
        # 获得 one batch的训练图像和标签
        train_images = train_images_batches[batch]
        train_labels = train_labels_batches[batch]

        # 将train_images展平
        train_images = np.reshape(train_images, (batch_size, -1))

        # 将train_labels转换成独热码形式
        train_labels = function.one_hot(train_labels, batch_size)
        
        # 前向传播
        layer1_z = np.dot(train_images, layer1_weights) + layer1_bias
        layer1_output = function.relu(layer1_z)
        layer2_z = np.dot(layer1_output, layer2_weights) + layer2_bias
        layer2_output = function.relu(layer2_z)
        layer3_z = np.dot(layer2_output, layer3_weights) + layer3_bias
        layer3_output = function.relu(layer3_z)
        layer4_z = np.dot(layer3_output, layer4_weights) + layer4_bias
        layer4_output = function.softmax(layer4_z)

        # 计算损失
        delta = 1e-7
        loss = -np.sum(train_labels * np.log(layer4_output + delta)) / batch_size


        # 反向传播
        dL_dZ4 = (layer4_output - train_labels) / batch_size
        dL_dW4 = np.dot(layer3_output.T, dL_dZ4)
        dL_dB4 = np.sum(dL_dZ4, axis=0, keepdims=True)
        
        dL_dY3 = np.dot(dL_dZ4, layer4_weights.T)
        dL_dZ3 = np.where(layer3_z>0, dL_dY3, 0)
        dL_dW3 = np.dot(layer2_output.T, dL_dZ3)
        dL_dB3 = np.sum(dL_dZ3, axis=0, keepdims=True)

        dL_dY2 = np.dot(dL_dZ3, layer3_weights.T)
        dL_dZ2 = np.where(layer2_z>0, dL_dY2, 0)
        dL_dW2 = np.dot(layer1_output.T, dL_dZ2)
        dL_dB2 = np.sum(dL_dZ2, axis=0, keepdims=True)

        dL_dY1 = np.dot(dL_dZ2, layer2_weights.T)
        dL_dZ1 = np.where(layer1_z>0, dL_dY1, 0)
        dL_dW1 = np.dot(train_images.T, dL_dZ1)
        dL_dB1 = np.sum(dL_dZ1, axis=0, keepdims=True)

        # 更新权重和偏置
        layer1_weights = layer1_weights - learning_rate * dL_dW1
        layer1_bias = layer1_bias - learning_rate * dL_dB1
        layer2_weights = layer2_weights - learning_rate * dL_dW2
        layer2_bias = layer2_bias - learning_rate * dL_dB2
        layer3_weights = layer3_weights - learning_rate * dL_dW3
        layer3_bias = layer3_bias - learning_rate * dL_dB3
        layer4_weights = layer4_weights - learning_rate * dL_dW4
        layer4_bias = layer4_bias - learning_rate * dL_dB4
    
    # 保存权重和偏置
    np.savez(f"./saved_weights_biases/epoch_{epoch}_weights_bias.npz", layer1_weights=layer1_weights, layer1_bias=layer1_bias,
                                                                       layer2_weights=layer2_weights, layer2_bias=layer2_bias,
                                                                       layer3_weights=layer3_weights, layer3_bias=layer3_bias,
                                                                       layer4_weights=layer4_weights, layer4_bias=layer4_bias)

    # 打印每个周期的训练损失
    print(f"Epoch {epoch + 1}/{num_epoch},\tLoss: {loss}")

Epoch 1/50,	Loss: 0.9710312748580695
Epoch 2/50,	Loss: 0.4056331798315276
Epoch 3/50,	Loss: 0.32253656785060514
Epoch 4/50,	Loss: 0.292258996958827
Epoch 5/50,	Loss: 0.2595936424328404
Epoch 6/50,	Loss: 0.22050401350508714
Epoch 7/50,	Loss: 0.18667641065524798
Epoch 8/50,	Loss: 0.15472900071301865
Epoch 9/50,	Loss: 0.1266383038912786
Epoch 10/50,	Loss: 0.10118879527037933
Epoch 11/50,	Loss: 0.08007857067811816
Epoch 12/50,	Loss: 0.05965905413754383
Epoch 13/50,	Loss: 0.04167216379125465
Epoch 14/50,	Loss: 0.030185042553703755
Epoch 15/50,	Loss: 0.023286788120944202
Epoch 16/50,	Loss: 0.018399494002690312
Epoch 17/50,	Loss: 0.015638236516398164
Epoch 18/50,	Loss: 0.013364717819748692
Epoch 19/50,	Loss: 0.01185567258930217
Epoch 20/50,	Loss: 0.01001339854293544
Epoch 21/50,	Loss: 0.009210935024416448
Epoch 22/50,	Loss: 0.00837596914923731
Epoch 23/50,	Loss: 0.007799943424917587
Epoch 24/50,	Loss: 0.007373712874078065
Epoch 25/50,	Loss: 0.006798810943144229
Epoch 26/50,	Loss: 0.0067438215