In [1]:
import numpy as np
import torchvision
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from tqdm import tqdm

In [2]:
def tanh(x):
    s1 = np.exp(x) - np.exp(-x)
    s2 = np.exp(x) + np.exp(-x)
    s = s1 / s2
    return s

# sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
    
# sigmoid的一阶导数
def Dsigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

In [3]:
# 标签one-hot处理
# 没一行代表一个样本的一个one-hot编码
# inputs: targets, 维度为n，targets[i]代表一个样本的标签，
# num: num==n
def onehot(targets, num):
    result = np.zeros((num, 10))
    for i in range(num):
        result[i][targets[i]] = 1
    return result

In [4]:
class NN(object):
    # lay0, lay1, lay2, lay3, batch_size
    # lay1是输入向量的维度，lay3是输出向量的维度
    def __init__(self, l0, l1, l2, l3, batch_size=6):
        self.lr = 0.4
        self.batch_size = batch_size
        # [l0,l1]
        self.W1 = np.random.randn(l0, l1) * 0.01
        self.b1 = np.random.randn(l1) * 0.01
        # [l1, l2]
        self.W2 = np.random.randn(l1, l2) * 0.01
        self.b2 = np.random.randn(l2) * 0.01
        # [l2, l3]
        self.W3 = np.random.randn(l2, l3) * 0.01
        self.b3 = np.random.randn(l3) * 0.01
 
    # 前向传播
    # X: [n, l0]
    # y: [n, 10]
    def forward(self, X, y):
        # [n, l0]
        self.X = X                                           
        self.z1 = np.dot(X, self.W1) + self.b1               # [n, l0] x [l0, l1], 等于中间层层数
        self.a1 = sigmoid(self.z1)                           # [n, l1]
 
        # self.z2 = np.dot(self.a1, self.W2) + self.b2         # m x 30
        # self.a2 = sigmoid(self.z2)                           # m x 30
 
        self.z2 = np.dot(self.a1, self.W2) + self.b2  # [n,l1]*[l1,l2]
        self.a2 = sigmoid(self.z2)                    # [n, l2]
 
        self.z3 = np.dot(self.a2, self.W3) + self.b3  # [n, l2] x [l2, l3]     
        self.a3 = sigmoid(self.z3)                    # [n, l3]                          
 
        loss = np.sum((self.a3 - y) * (self.a3 - y)) / 6     # 1
        
        # f(x)=sigmod(g(x))
        # d(f(x))/d(x)=d(f(x))/d(g(x))*d(g(x))/d(x)
        self.d3 = (self.a3 - y) * Dsigmoid(self.z3)          # [n, l3]
        return loss, self.a3
 
    # 反向传播
    def backward(self):
        dW3 = np.dot(self.a2.T, self.d3) / self.batch_size          # [l2, n] x [n, l3] / 6 = [l2, l3]
        db3 = np.sum(self.d3, axis=0) / self.batch_size             # [n, l3] -> [l3]               
 
        d2 = np.dot(self.d3, self.W3.T) * Dsigmoid(self.z2)         # [n, l3] x [l3, l2] x [n, l2]
        dW2 = np.dot(self.a1.T, d2) / self.batch_size               # [l1, n] x [n, l2] = [l1, l2]
        db2 = np.sum(d2, axis=0) / self.batch_size                  # [n, l2] -> [l2]
 
        d1 = np.dot(d2, self.W2.T) * Dsigmoid(self.z1)              # [n, l2] x [l2, l1] x [n ,l1] = [n, l1]
        dW1 = np.dot(self.X.T, d1) / self.batch_size                # [l0, n] x [n, l1] = [l0, l1]        
        db1 = np.sum(d1, axis=0) / self.batch_size                  # [n, l1] -> [l1]
 
        self.W3 -= self.lr * dW3
        self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

In [5]:
writer = SummaryWriter()

def train(train_data):
    
    nn = NN(784, 200, 30, 10)
    print(f"{datetime.now()} Begin training...")

    for epoch in tqdm(range(10)):
        loss_sum=0
        num_pic=0
        
        for i in range(0, 60000, nn.batch_size):
            X = train_data.data[i:i+nn.batch_size]
            Y = train_data.targets[i:i+nn.batch_size]
            loss, _ = nn.forward(X, Y)
            loss_sum += loss
            num_pic += 1
            # print("epoch:", epoch, "-", i, ":", "{:.3f}".format(loss) )
            nn.backward()
            
        tqdm.write(
            f"Epoch: {epoch} Train loss: {loss_sum/num_pic}",
            end=" ",
        )
        writer.add_scalar("Loss/train", loss_sum/num_pic, epoch)
        
        np.savez("data.npz", w1=nn.W1, b1=nn.b1, w2=nn.W2, b2=nn.b2, w3=nn.W3, b3=nn.b3)
    writer.close()
    print(f"{datetime.now()} End training...")

In [11]:
def test(test_data):
    r = np.load("data.npz")
    nn = NN(784, 200, 30, 10)
    nn.W1 = r["w1"]
    nn.b1 = r["b1"]
    nn.W2 = r["w2"]
    nn.b2 = r["b2"]
    nn.W3 = r["w3"]
    nn.b3 = r["b3"]
 
 
    _, result = nn.forward(test_data.data, test_data.targets1)
    result = np.argmax(result, axis=1)
    # print(result)
    precison = np.sum(result==test_data.targets) / 10000
    print("Precison:", precison)

In [12]:
if __name__ == '__main__':
 
    # Mnist手写数字集
    train_data = torchvision.datasets.MNIST(root='data/', train=True, download=True)
    test_data = torchvision.datasets.MNIST(root='data/', train=False)
    train_data.data = train_data.data.numpy()         # [60000,28,28]
    train_data.targets = train_data.targets.numpy()   # [60000]
    test_data.data = test_data.data.numpy()           # [10000,28,28]
    test_data.targets = test_data.targets.numpy()     # [10000]
 
    # 输入向量处理
    train_data.data = train_data.data.reshape(60000, 28 * 28) / 255.  # (60000, 784)
    test_data.data = test_data.data.reshape(10000, 28 * 28) / 255.
 
    # 标签one-hot处理
    train_data.targets = onehot(train_data.targets, 60000) # (60000, 10)
    test_data.targets1 = onehot(test_data.targets, 10000)  # (10000, 10)
 
    train(train_data)
    test(test_data)

2021-09-23 10:43:55.504905 Begin training...


 10%|█         | 1/10 [00:08<01:14,  8.23s/it]

Epoch: 0 Train loss: 0.8875768293275492 

 20%|██        | 2/10 [00:16<01:05,  8.24s/it]

Epoch: 1 Train loss: 0.47149937666761277 

 30%|███       | 3/10 [00:24<00:57,  8.27s/it]

Epoch: 2 Train loss: 0.13782746233950394 

 40%|████      | 4/10 [00:32<00:49,  8.22s/it]

Epoch: 3 Train loss: 0.09607324276702973 

 50%|█████     | 5/10 [00:41<00:41,  8.21s/it]

Epoch: 4 Train loss: 0.07463461998733435 

 60%|██████    | 6/10 [00:48<00:32,  8.06s/it]

Epoch: 5 Train loss: 0.060987298314839004 

 70%|███████   | 7/10 [00:56<00:23,  7.96s/it]

Epoch: 6 Train loss: 0.05154206474473889 

 80%|████████  | 8/10 [01:04<00:15,  7.89s/it]

Epoch: 7 Train loss: 0.04446003767459459 

 90%|█████████ | 9/10 [01:12<00:07,  7.85s/it]

Epoch: 8 Train loss: 0.03892860199496097 

100%|██████████| 10/10 [01:19<00:00,  8.00s/it]

Epoch: 9 Train loss: 0.034385237049216894 2021-09-23 10:45:15.482111 End training...
Precison: 0.9723





In [13]:
test(test_data)

Precison: 0.9723


In [14]:
import matplotlib.pyplot as plt