从零实现softmax

In [7]:
import torch
import torchvision
from torchvision import transforms
from torch.utils import data

In [4]:
# 训练进程数
def get_dataloader_workers():
    return 4

In [9]:
def load_data_fashion_mnist(batch_size , resize = None):
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0 , transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root="../data", train=True,transform=trans,download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root="../data", train=False,transform=trans,download=True)
    return (data.DataLoader(mnist_train,batch_size,shuffle=True,num_workers=get_dataloader_workers()),data.DataLoader(mnist_test,batch_size,shuffle=True,num_workers=get_dataloader_workers()))

加载要测试的数据

In [10]:
batch_size = 256
train_iter , test_iter = load_data_fashion_mnist(batch_size)

初始化模型参数

In [11]:
num_input = 784 # 输入的图像为28*28位 = 784
num_output = 10 # 10个类别

W = torch.normal(0 , 0.01 , size= (num_input, num_output) , requires_grad= True)
b = torch.zeros(num_output , requires_grad= True)

In [12]:
# softmax的计算方法
def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim= True)
    return X_exp / partition

In [13]:
def net(X):
    return softmax(torch.matmul(X.reshape((-1,W.shape[0])) , W) + b)

损失函数

交叉熵函数

In [14]:
def cross_entropy(y_hat,y):
    return - torch.log(y_hat[range(len(y_hat)) , y])

In [15]:
def accuracy(y_hat,y):
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis = 1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

In [17]:
class Accumulator:
    def __init__(self , n):
        self.data = [0.0] * n

    def add(self , *args):
        self.data = [a + float(b) for a,b in zip(self.data,args)]
    
    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self , idx):
        return self.data[idx]

训练

In [18]:
def train_epoch_ch3(net , train_iter , loss , updater ):
    if isinstance(net , torch.nn.Module):
        net.train()
    metric = Accumulator(3)
    for X , y in train_iter:
        y_hat = net(X)
        l = loss(y_hat , y)
        if isinstance(updater , torch.optim.Optimizer):
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()) , accuracy(y_hat , y) , y.numel())
    return metric[0] / metric[2] , metric[1] / metric[2]