## 准备数据

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.python import keras
from tensorflow.python.keras import layers, optimizers
from keras.datasets import mnist
from torch import optim

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # or any {'0', '1', '2'}

def mnist_dataset():
    (x, y), (x_test, y_test) = mnist.load_data()
    #normalize
    x = x/255.0
    x_test = x_test/255.0
    
    return (x, y), (x_test, y_test)

In [2]:
print(list(zip([1, 2, 3, 4], ['a', 'b', 'c', 'd'])))

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')]


## 构建全连接层

In [3]:
class FullConnectionLayer:
    def __init__(self):
        self.mem = {}
    def forward(self, X, W):
        """
        :param X: shape(m,d),前向传播输入矩阵
        :param W: shape(d,d'),前向传播权重矩阵
        :return: 前向传播输出矩阵
        """
        self.mem['X'] = X
        self.mem['W'] = W
        H = np.matmul(X, W)
        return H
    def backward(self, grad_H):
        """
        :param grad_H: shape(m,d'),Loss关于 H 的梯度
        :return: grad_X: shape(m,d),Loss关于 X 的梯度
                 grad_W: shape(d,d'),Loss关于 W 的梯度
        """
        X = self.mem['X']
        W = self.mem['W']
        grad_X = np.matmul(grad_H, W.T)
        grad_W = np.matmul(X.T, grad_H)
        return grad_X, grad_W
            

## 实现激活函数

In [4]:
class Relu:
    def __init__(self):
        self.mem = {}
        
    def forward(self, x):
        self.mem['x']=x
        return np.where(x > 0, x, np.zeros_like(x))
    
    def backward(self, grad_y):
        grad_x = np.where(self.mem['x']>0, grad_y, np.zeros_like(grad_y))
        return grad_x
    
    
class Sigmoid:
    def __init__(self):
        self.mem = {}
        
    def forward(self, x):
        self.mem['x'] = x
        return 1 / (1 + np.exp(-x))
    
    def backward(self, grad_y):
        x = self.mem['x']
        grad_x = grad_y * (self.forward(x) * (1 - self.forward(x)))
        return grad_x


class Softmax:
    def __init__(self):
        self.epsilon = 1e-12
        self.mem = {}
        
    def forward(self, x):
        x_exp = np.exp(x)
        partition = np.sum(x_exp, axis=1, keepdims=True)
        out = x_exp/(partition+self.epsilon)
        
        self.mem['out'] = out
        self.mem['x_exp'] = x_exp
        return out
    
    def backward(self, grad_y):
        s = self.mem['out']
        sisj = np.matmul(np.expand_dims(s,axis=2), np.expand_dims(s, axis=1)) # (N, c, c)
        g_y_exp = np.expand_dims(grad_y, axis=1)
        tmp = np.matmul(g_y_exp, sisj) #(N, 1, c)
        tmp = np.squeeze(tmp, axis=1)
        tmp = -tmp+grad_y*s 
        return tmp
    
    
class CrossEntropy():
    def __init__(self):
        self.mem = {}
        self.epsilon = 1e-12  # 防止求导后分母为 0

    def forward(self, p, y):
        self.mem['p'] = p
        log_p = np.log(p + self.epsilon)
        return np.mean(np.sum(-y * log_p, axis=1))

    def backward(self, y):
        p = self.mem['p']
        return -y * (1 / (p + self.epsilon))

## 建立模型

In [15]:
class myModel:
    def __init__(self):
        self.W1 = np.random.normal(size=[28*28+1, 100])
        self.W2 = np.random.normal(size=[100, 10])
        
        self.mul_h1 = FullConnectionLayer()
        self.mul_h2 = FullConnectionLayer()
        self.relu = Relu()
        self.softmax = Softmax()
        self.cross_en = CrossEntropy()
        self.sigmoid = Sigmoid()
        
    def forward(self, x, label):
        x = x.reshape(-1, 28*28)
        bias = np.ones(shape=[x.shape[0], 1])
        x = np.concatenate([x, bias], axis=1)
        self.h1 = self.mul_h1.forward(x, self.W1)
        self.h1_sig = self.sigmoid.forward(self.h1)
        self.h2 = self.mul_h2.forward(self.h1_sig, self.W2)
        self.h2_soft = self.softmax.forward(self.h2)
        self.loss = self.cross_en.forward(self.h2_soft, label)
        
    def backward(self, label):
        self.loss_grad = self.cross_en.backward(label)
        self.h2_soft_grad = self.softmax.backward(self.loss_grad)
        self.h2_grad, self.W2_grad = self.mul_h2.backward(self.h2_soft_grad)
        self.h1_sig_grad = self.sigmoid.backward(self.h2_grad)
        self.h1_grad, self.W1_grad = self.mul_h1.backward(self.h1_sig_grad)
    
model = myModel()

# optimizer = optimizers.Adam()
optimizer = optim.Adam


## 计算 loss

In [16]:
def compute_accuracy(prob, labels):
    predictions = np.argmax(prob, axis=1)
    truth = np.argmax(labels, axis=1)
    return np.mean(predictions==truth)

def train_one_step(model, x, y):
    model.forward(x, y)
    model.backward(y)
    model.W1 -= 1e-5* model.W1_grad
    model.W2 -= 1e-5* model.W2_grad
    loss = model.loss
    accuracy = compute_accuracy(model.h2_soft, y)
    return loss, accuracy

def test(model, x, y):
    model.forward(x, y)
    loss = model.loss
    accuracy = compute_accuracy(model.h2_soft, y)
    return loss, accuracy

## 实际训练

In [18]:
train_data, test_data = mnist_dataset()
train_label = np.zeros(shape=[train_data[0].shape[0], 10])
test_label = np.zeros(shape=[test_data[0].shape[0], 10])
train_label[np.arange(train_data[0].shape[0]), np.array(train_data[1])] = 1.
test_label[np.arange(test_data[0].shape[0]), np.array(test_data[1])] = 1.

for epoch in range(50):
    loss, accuracy = train_one_step(model, train_data[0], train_label)
    print('epoch', epoch, ': loss', loss, '; accuracy', accuracy)
loss, accuracy = test(model, test_data[0], test_label)

print('test loss', loss, '; accuracy', accuracy)

epoch 0 : loss 8.09721174344117 ; accuracy 0.09778333333333333
epoch 1 : loss 6.659444934769633 ; accuracy 0.08828333333333334
epoch 2 : loss 5.962645124005233 ; accuracy 0.11105
epoch 3 : loss 5.527197651314062 ; accuracy 0.12231666666666667
epoch 4 : loss 5.1324274000737775 ; accuracy 0.13683333333333333
epoch 5 : loss 4.769154613241772 ; accuracy 0.15478333333333333
epoch 6 : loss 4.437422904564867 ; accuracy 0.1743
epoch 7 : loss 4.136701414758591 ; accuracy 0.1936
epoch 8 : loss 3.865710007068367 ; accuracy 0.21511666666666668
epoch 9 : loss 3.6226485227616685 ; accuracy 0.23751666666666665
epoch 10 : loss 3.4053944506301916 ; accuracy 0.2598
epoch 11 : loss 3.211614042195719 ; accuracy 0.283
epoch 12 : loss 3.0389000795509022 ; accuracy 0.3038666666666667
epoch 13 : loss 2.8849095241030294 ; accuracy 0.3249666666666667
epoch 14 : loss 2.747432689842433 ; accuracy 0.34578333333333333
epoch 15 : loss 2.6244175523891538 ; accuracy 0.36493333333333333
epoch 16 : loss 2.51400009739660