In [5]:
import torch
import numpy as np
from matplotlib import pyplot as plt
from torch import nn

plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False

class MLP:

    def __init__(self):
        self.W1 = np.ones([4, 5])
        self.W2 = np.ones([4, 4])
        self.W3 = np.ones([3, 4])
        self.b1 = np.ones([4,1])
        self.b2 = np.ones([4, 1])
        self.b3 = np.ones([3, 1])
        self.alpha = 0.001  # 学习率
        self.epsilon = 10e-6  # 阈值
        self.epoch = 1000  # 最大迭代次数
        self.h1 = np.zeros([4, 100])
        self.h2 = np.zeros([4, 100])

    def Sigmoid(self, x):
        return 1 / (np.exp(-x) + 1)

    def Diff_sigmoid(self, x):
        y = self.Sigmoid(x)
        return y * (1 - y)

    def Softmax(self, input):
        exp = np.exp(input)
        return exp / np.sum(exp, axis=1).reshape(-1, 1)

    def CrossEntropy(self, out, label):  # label属于one-hot向量(nx3)，out是输出神经元的向量(nx3)，返回交叉熵
        log = np.log(out)
        return np.trace(-log @ label.T) / len(label)

    def FP(self, w1, w2, w3, b1, b2, b3,input):  # W1:(4x5) W2:(4x4) W3:(3x4)
        self.h1 = f.Sigmoid(w1 @ input.T + b1)  # h1每列代表一行x的隐层
        self.h2 = f.Sigmoid(w2 @ self.h1 + b2)  # h2每列代表h1的隐层
        return f.Softmax((w3 @ self.h2 + b3).T)  # 输出nx3

    def One_hot(self,label): #label:(nx1),返回nx3的one_hot矩阵
        n = len(label)
        t = np.zeros((n,3))
        for i in range(n):
            t[i,label[i]] = 1
        return t

    def GD(self,dw1,dw2,dw3,db1,db2,db3):
        self.W1 = self.W1 - self.alpha * dw1
        self.W2 = self.W2 - self.alpha * dw2
        self.W3 = self.W3 - self.alpha * dw3
        self.b1 = self.b1 - self.alpha * db1
        self.b2 = self.b2 - self.alpha * db2
        self.b3 = self.b3 - self.alpha * db3

    def BP(self,train,w1,w2,w3,b1,b2,b3,label):  #label:(3xn)
        yloss = (self.FP(w1,w2,w3,b1,b2,b3,train).T - label)/len(train)
        dw3 = yloss @ self.h2.T    #d3:(3x4)
        db3 = yloss @ np.ones([100,1])
        t = (w3.T @ yloss) * self.Diff_sigmoid(w2 @ self.h1)   #t:(4xn)
        dw2 = t @ self.h1.T  #d2:(4x4)
        db2 = t @ np.ones([100,1])
        tt = self.Diff_sigmoid(w1 @ train.T)
        dw1 = (t * tt) @ train    #d1:(4x5)
        db1 = (t * tt) @ np.ones([100,1])
        return dw1,dw2,dw3,db1,db2,db3

    def fit(self,train,label):  #train:(nx5),label:(nx1)
        Loss = []
        for i in range(self.epoch):
            loss = self.CrossEntropy(self.FP(self.W1,self.W2,self.W3,self.b1,self.b2,self.b3,train),self.One_hot(label))
            Loss.append(loss)
            if len(Loss) < 2:
                pass
            elif abs(Loss[i]- Loss[i-1]) < self.epsilon:
                break
            dw1,dw2,dw3,db1,db2,db3 = self.BP(train,self.W1,self.W2,self.W3,self.b1,self.b2,self.b3,self.One_hot(label).T)
            self.GD(dw1,dw2,dw3,db1,db2,db3)
        print('mydi:',dw1,dw2,dw3)
        print('mybi:',db1,db2,db3)
        #self.plot(Loss)
        print('myloss:',Loss[len(Loss)-1])
        return Loss

    def plot(self,L):
        x = range(1, len(L)+1)
        plt.title('损失函数随训练次数的变化')
        plt.xlabel('训练次数')
        plt.ylabel('损失函数')
        plt.plot(x,L)
        plt.show()
    
    def compare(self,x,y):
        train = torch.from_numpy(x)
        y = y.flatten()
        label = torch.from_numpy(y)
        w1 = torch.from_numpy(np.ones([4, 5]))
        w2 = torch.from_numpy(np.ones([4, 4]))
        w3 = torch.from_numpy(np.ones([3, 4]))
        b1 = torch.from_numpy(np.ones([4,1]))
        b2 = torch.from_numpy(np.ones([4, 1]))
        b3 = torch.from_numpy(np.ones([3, 1]))
        w1 = torch.autograd.Variable(w1,requires_grad=True)
        w2 = torch.autograd.Variable(w2,requires_grad=True)
        w3 = torch.autograd.Variable(w3,requires_grad=True)
        b3 = torch.autograd.Variable(b3,requires_grad=True)
        b2 = torch.autograd.Variable(b2,requires_grad=True)
        b1 = torch.autograd.Variable(b1,requires_grad=True)
        sig = nn.Sigmoid()
        ce = nn.CrossEntropyLoss()
        Loss = []
        for i in range(self.epoch):
            h1 = sig(torch.mm(w1,train.T)+b1)
            h2 = sig(torch.mm(w2,h1)+b2)
            y_hat = torch.mm(w3,h2).T +b3.T
            loss = ce(y_hat,label.long())
            Loss.append(float(loss))
            if len(Loss)< 2:
                pass
            elif abs(Loss[i]-Loss[i-1])< self.epsilon:
                break
            loss.backward()
            d1 = w1.grad
            d2 = w2.grad
            d3 = w3.grad
            db1 = b1.grad
            db2 = b2.grad
            db3 = b3.grad
            w1 = torch.autograd.Variable(w1 - 1*self.alpha * d1,requires_grad=True)
            w2 = torch.autograd.Variable(w2 - 1*self.alpha * d2,requires_grad=True)
            w3 = torch.autograd.Variable(w3 - 1*self.alpha * d3,requires_grad=True)
            b1 = torch.autograd.Variable(b1 - 1*self.alpha * db1,requires_grad=True)
            b2 = torch.autograd.Variable(b2 - 1*self.alpha * db2,requires_grad=True)
            b3 = torch.autograd.Variable(b3 - 1*self.alpha * db3,requires_grad=True)
        print('torch di:',d1,d2,d3)
        print('torch bi:',b1,b2,b3)
        #self.plot(Loss)
        print('torch_loss:',Loss[len(Loss)-1])
        return Loss

In [6]:
f=MLP()
train_data = np.random.random((100,5)) * 100
label = np.random.randint(3, size=(100,1))

In [7]:
f.fit(train_data,label)

mydi: [[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]] [[-2.24076587e-06 -2.24076587e-06 -2.24076587e-06 -2.24076587e-06]
 [-2.24076587e-06 -2.24076587e-06 -2.24076587e-06 -2.24076587e-06]
 [-2.24076587e-06 -2.24076587e-06 -2.24076587e-06 -2.24076587e-06]
 [-2.24076587e-06 -2.24076587e-06 -2.24076587e-06 -2.24076587e-06]] [[-0.03298118 -0.03298118 -0.03298118 -0.03298118]
 [ 0.02999268  0.02999268  0.02999268  0.02999268]
 [ 0.0029885   0.0029885   0.0029885   0.0029885 ]]
mybi: [[0.]
 [0.]
 [0.]
 [0.]] [[-2.24076587e-06]
 [-2.24076587e-06]
 [-2.24076587e-06]
 [-2.24076587e-06]] [[-0.03320341]
 [ 0.03019477]
 [ 0.00300864]]
myloss: 1.0979375989067373


[1.0986122886681096,
 1.0986000970247116,
 1.098587945553988,
 1.0985758341245444,
 1.0985637626054086,
 1.0985517308660322,
 1.0985397387762872,
 1.0985277862064646,
 1.0985158730272748,
 1.0985039991098455,
 1.0984921643257188,
 1.098480368546853,
 1.0984686116456173,
 1.0984568934947954,
 1.0984452139675798,
 1.0984335729375732,
 1.0984219702787847,
 1.0984104058656319,
 1.098398879572937,
 1.0983873912759266,
 1.0983759408502296,
 1.0983645281718777,
 1.0983531531173012,
 1.0983418155633307,
 1.0983305153871945,
 1.0983192524665173,
 1.0983080266793182,
 1.0982968379040117,
 1.0982856860194046,
 1.0982745709046948,
 1.0982634924394719,
 1.0982524505037132,
 1.0982414449777835,
 1.0982304757424366,
 1.0982195426788093,
 1.0982086456684232,
 1.098197784593185,
 1.0981869593353786,
 1.098176169777674,
 1.0981654158031164,
 1.0981546972951304,
 1.0981440141375187,
 1.0981333662144592,
 1.098122753410503,
 1.098112175610577,
 1.0981016326999775,
 1.098091124564375,
 1.0980806510898085,


In [8]:
f.compare(train_data,label)

torch di: tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]], dtype=torch.float64) tensor([[-8.4340e-07, -8.4340e-07, -8.4340e-07, -8.4340e-07],
        [-8.4340e-07, -8.4340e-07, -8.4340e-07, -8.4340e-07],
        [-8.4340e-07, -8.4340e-07, -8.4340e-07, -8.4340e-07],
        [-8.4340e-07, -8.4340e-07, -8.4340e-07, -8.4340e-07]],
       dtype=torch.float64) tensor([[-0.0330, -0.0330, -0.0330, -0.0330],
        [ 0.0300,  0.0300,  0.0300,  0.0300],
        [ 0.0030,  0.0030,  0.0030,  0.0030]], dtype=torch.float64)
torch bi: tensor([[1.],
        [1.],
        [1.],
        [1.]], dtype=torch.float64, requires_grad=True) tensor([[1.0000],
        [1.0000],
        [1.0000],
        [1.0000]], dtype=torch.float64, requires_grad=True) tensor([[1.0021],
        [0.9981],
        [0.9998]], dtype=torch.float64, requires_grad=True)
torch_loss: 1.0979375989077267


[1.0986122886681082,
 1.098600097024711,
 1.0985879455539875,
 1.0985758341245455,
 1.09856376260541,
 1.0985517308660322,
 1.098539738776286,
 1.098527786206465,
 1.0985158730272782,
 1.098503999109848,
 1.0984921643257237,
 1.0984803685468585,
 1.0984686116456261,
 1.0984568934948065,
 1.098445213967594,
 1.0984335729375903,
 1.0984219702788038,
 1.0984104058656563,
 1.0983988795729647,
 1.0983873912759605,
 1.0983759408502687,
 1.098364528171925,
 1.098353153117355,
 1.0983418155633915,
 1.0983305153872625,
 1.098319252466595,
 1.0983080266794054,
 1.0982968379041083,
 1.0982856860195132,
 1.0982745709048143,
 1.0982634924396049,
 1.0982524505038587,
 1.0982414449779427,
 1.0982304757426105,
 1.0982195426789985,
 1.098208645668632,
 1.0981977845934074,
 1.0981869593356208,
 1.0981761697779349,
 1.098165415803396,
 1.098154697295432,
 1.0981440141378442,
 1.0981333662148072,
 1.098122753410875,
 1.098112175610972,
 1.0981016327004005,
 1.0980911245648246,
 1.0980806510902843,
 1.0980