# 导入包及模块

In [62]:
import torch
import numpy as np
import torchvision
import torchvision.transforms as transforms

# 获取和读取数据

In [63]:
#获取数据集
mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=True, download=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=False, download=True, transform=transforms.ToTensor())
#读取数据集
batchSize=256
trainIter=torch.utils.data.DataLoader(mnist_train,batch_size=batchSize,shuffle=True,num_workers=8)
testIter=torch.utils.data.DataLoader(mnist_test,batch_size=batchSize,shuffle=True,num_workers=8)

# 定义模型参数

In [64]:
numInput=784
numOutput=10
numHidden=256
w1=torch.tensor(np.random.normal(0,0.01,(numInput,numHidden)),requires_grad=True,dtype=torch.float)
b1=torch.zeros(1,numHidden,requires_grad=True,dtype=torch.float)
w2=torch.tensor(np.random.normal(0,0.01,(numHidden,numOutput)),requires_grad=True,dtype=torch.float)
b2=torch.zeros(1,numOutput,requires_grad=True,dtype=torch.float)

# 定义激活函数

In [65]:
def relu(x):
    return torch.max(x,torch.tensor(0,dtype=torch.float))
print(relu(torch.tensor([[1,-1],[2,-3]],dtype=torch.float)))

tensor([[1., 0.],
        [2., 0.]])


# 定义模型

In [66]:
def net(x):
    x = x.view(-1,numInput)
    x = relu(torch.mm(x,w1)+b1)
    return torch.mm(x,w2)+b2

# 定义损失函数

In [67]:
loss = torch.nn.CrossEntropyLoss()

# 定义优化算法

In [68]:
def sgd(params,lr,batchSize):
    for param in params:
        param.data -= lr*param.grad / batchSize

# 计算分类准确率

In [69]:
def accuracy(yP,y):
    return (yP.argmax(dim=1)==y).float().mean().item()

def evaluateAccuary(dataIter,net):
    accSum=0
    n=0
    for x,y in dataIter:
        accSum+=(net(x).argmax(dim=1)==y).float().sum().item()
        n+=y.shape[0]
    return accSum/n

# 训练模型

In [70]:
epochsNum=5
#PyTorch的CrossEntropyLoss函数默认的是求平均，所以自己写的sgd反向梯度传播不需要在除以批次大小，所以学习率需要乘以批次
lr=100

for epoch in range(epochsNum):
    train_l_sum=0
    n=0
    for x,y in trainIter:
        yP=net(x)
        l=loss(yP,y).sum()
        l.backward()
        sgd([w1,b1,w2,b2],lr,batchSize)
        w1.grad.data.zero_()
        b1.grad.data.zero_()
        w2.grad.data.zero_()
        b2.grad.data.zero_()
        train_l_sum+=l
        n+=y.shape[0]
    print(epoch,train_l_sum.item()/n,evaluateAccuary(testIter,net))

0 0.0030507914225260417 0.7764
1 0.0018792550404866536 0.8157
2 0.0016602760314941407 0.8466
3 0.001525217056274414 0.8489
4 0.0014431463877360026 0.8365
