In [70]:
import numpy as np
from collections import OrderedDict

In [71]:
def softmax(x):
    if x.ndim == 1:  # ndim不是函数 不要()
        x = x.reshape(1, -1)
    
    x_max = np.max(x, axis=1, keepdims=True)
    x_exp = np.exp(x - x_max)
    x_exp_sum = np.sum(x_exp, axis=1, keepdims=True)
    y = x_exp / x_exp_sum

    return y

def cross_entropy_error(y, t):
    if y.ndim == 1:
        y = y.reshape(1, -1)

    loss = -np.mean(np.log(y[np.arange(y.shape[0]), t] + 1e-6))

    return loss

In [72]:
class Affine:
    def __init__(self, W, b) -> None:
        self.W = W
        self.b = b
        self.x = np.array([])
        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b

        return out

    def backward(self, dout):
        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.x.T, dout)
        dx = np.dot(dout, self.W.T)

        return dx  # 不要忘了return

class Relu:
    def __init__(self) -> None:  # 激活函数不需要参数
        self.mask = np.array([])  # 也不需要更新参数

    def forward(self, x):
        self.mask = x <= 0
        out = np.where(self.mask, 0, x)

        return out

    def backward(self, dout):
        dx = np.where(self.mask, 0, dout)

        return dx

class SoftmaxWithLoss:
    def __init__(self) -> None:
        self.y = np.array([])
        self.t = np.array([])

    def forward(self, x, t):
        self.y = softmax(x)
        self.t = t
        loss = cross_entropy_error(self.y, self.t)

        return loss

    def backward(self, dout=1):
        t_onehot = np.zeros_like(self.y)
        t_onehot[np.arange(t_onehot.shape[0]), self.t] = 1

        batch_size = self.y.shape[0]
        dx = (self.y - t_onehot) / batch_size

        return dx

In [73]:
class TwoLayersNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.1) -> None:
        self.params_key = ['W1', 'b1', 'W2', 'b2']

        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        self.lastlayer = SoftmaxWithLoss()

    def predict(self, x):
        y = x
        for layer in self.layers.values():
            y = layer.forward(y)
        
        return y

    def loss(self, x, t):
        y = self.predict(x)
        loss = self.lastlayer.forward(y, t)

        return loss

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        accuracy = np.mean(y == t)

        return accuracy

    def gradient(self, x, t):
        self.loss(x, t)

        dout = self.lastlayer.backward()
        dx = dout

        reversed_layers = list(self.layers.values())[::-1]
        for layer in reversed_layers:
            dx = layer.backward(dx)
        
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW  # 这里不要忘了d 否则返回的是参数本身
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads

In [74]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.MNIST(
    root='../data',
    train=True,
    download=False,
    transform=transform
)

x_train_list = []
t_train_list = []
for image, label in train_dataset:
    x_train_list.append(image)
    t_train_list.append(label)

x_train = np.array(x_train_list)
x_train = x_train.reshape(x_train.shape[0], -1)
t_train = np.array(t_train_list)

transform = transforms.Compose([
    transforms.ToTensor()
])

test_dataset = datasets.MNIST(
    root='../data',
    train=False,
    download=False,
    transform=transform
)

x_test_list = []
t_test_list = []
for image, label in test_dataset:
    x_test_list.append(image)
    t_test_list.append(label)

x_test = np.array(x_test_list)
x_test = x_test.reshape(x_test.shape[0], -1)
t_test = np.array(t_test_list)

In [75]:
network = TwoLayersNet(784, 100, 10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size // batch_size, 1)

train_loss = network.loss(x_train, t_train)
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
print(f"train_loss: {train_loss}")
print(f"train_acc: {train_acc}")
print(f"test_acc: {test_acc}")

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # 计算梯度
    grad = network.gradient(x_batch, t_batch)

    # 更新
    for key in network.params_key:
        network.params[key] -= learning_rate * grad[key]
    
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if (i + 1) % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(f"epoch {(i + 1) // iter_per_epoch} train_acc: {train_acc}")
        print(f"epoch {(i + 1) // iter_per_epoch} test_acc: {test_acc}") 

train_loss: 2.4280055921240993
train_acc: 0.09896666666666666
test_acc: 0.0958
epoch 1 train_acc: 0.9147166666666666
epoch 1 test_acc: 0.915
epoch 2 train_acc: 0.93875
epoch 2 test_acc: 0.9374
epoch 3 train_acc: 0.94875
epoch 3 test_acc: 0.9475
epoch 4 train_acc: 0.95575
epoch 4 test_acc: 0.9528
epoch 5 train_acc: 0.9620166666666666
epoch 5 test_acc: 0.9578
epoch 6 train_acc: 0.9658
epoch 6 test_acc: 0.9635
epoch 7 train_acc: 0.9693666666666667
epoch 7 test_acc: 0.9659
epoch 8 train_acc: 0.97195
epoch 8 test_acc: 0.9667
epoch 9 train_acc: 0.97415
epoch 9 test_acc: 0.969
epoch 10 train_acc: 0.9765666666666667
epoch 10 test_acc: 0.9692
epoch 11 train_acc: 0.9783333333333334
epoch 11 test_acc: 0.9709
epoch 12 train_acc: 0.9798
epoch 12 test_acc: 0.9713
epoch 13 train_acc: 0.9813166666666666
epoch 13 test_acc: 0.9728
epoch 14 train_acc: 0.9827666666666667
epoch 14 test_acc: 0.9724
epoch 15 train_acc: 0.9837666666666667
epoch 15 test_acc: 0.9732
epoch 16 train_acc: 0.9851
epoch 16 test_acc: