<a href="https://colab.research.google.com/github/Yuchen971/DiveIntoDeepLearning/blob/main/Linear_Neural_Networks/Softmax.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install d2l

In [2]:
%matplotlib inline
import torch
import torchvision
from torch.utils import data
from torchvision import transforms 
from d2l import torch as d2l

In [3]:
def get_fashion_mnist_labels(labels): 
  """返回Fashion-MNIST数据集的文本标签"""
  text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                  'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'] 
  return [text_labels[int(i)] for i in labels]

def get_data_loader_workers():
  return 4 # 使用四个进程

def load_data_fashion_mnist(batch_size, resize = None):
  '''
  load_data_fashion_mnist函数
  用于获取和读取Fashion-MNIST数据集
  返回训练集和验证集的数据迭代器
  '''
  # ToTensor PIL (H*W*C) in [0,255] 
  # -> torch.Tensor (C*H*W) in [0.0, 1.0]
  trans = [transforms.ToTensor()] # trans is a list
  if resize:
    # insert new transformer into the trans list
    trans.insert(0, transforms.Resize(resize))
  # 串联多个图片变换的操作, 也就是串联trans的list
  trans = transforms.Compose(trans)
  mnist_train = torchvision.datasets.FashionMNIST(
      root="../data", train=True, transform = trans, download=True
  )
  mnist_test = torchvision.datasets.FashionMNIST(
      root="../data", train=False, transform = trans, download=True
  )
  return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                          num_workers=get_data_loader_workers()),
          data.DataLoader(mnist_test, batch_size, shuffle=False,
                          num_workers = get_data_loader_workers()))


In [24]:
batch_size = 256
num_epochs = 10
lr = 0.1
train_iter, test_iter = load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True) 
b = torch.zeros(num_outputs, requires_grad=True)

def softmax(X):
  X_exp = torch.exp(X)
  partition = X_exp.sum(1, keepdim=True)
  return X_exp / partition # 这里使用广播机制
  # ([256, 10]) / ([256, 1]) => ([256, 10])

# 这里X变为(256, 784) 其中256是batch_size, 784是图片拉长的维度
# 也就是每一行一个被拉长的图片, 784个像素点feature, 有256张
# W维度为(784, 10), 相乘得到的维度为 (256, 10), 也就是每一张图片的十种类别的概率
def net(X):
  return softmax(
      torch.matmul(X.reshape(-1, W.shape[0]), W) + b
  )

def cross_entropy(y_hat, y):
  '''
  y = torch.tensor([0, 2])
  y_hat = torch.tensor([[0.1, 0.3, 0.6],[0.3, 0.2, 0.5]])
  cross_entropy(y_hat, y)
  '''
  return -torch.log(y_hat[range(len(y_hat)), y])

def accuracy(y_hat, y):
  if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
    y_hat = y_hat.argmax(axis=1) # 每行中最大元素的索引来获得预测类别
  # 将y_hat的数据类型转换为与y的数据类型, 一个32, 一个64
  cmp = y_hat.type(y.dtype) == y 
  # 将 TRUE, FALSE -> 0, 1
  return float(cmp.type(y.dtype).sum()) #return 正确的个数



  cpuset_checked))


In [21]:
class Accumulator:
  '''对n个变量叠加'''
  def __init__(self, n):
    self.data = [0.0] * n # 初始化n个数
  def add(self, *args):
    # 元素相加
    self.data = [a + float(b) for a, b in zip(self.data, args)]
  def reset(self):
    self.data = [0.0] * len(self.data)
  def __getitem__(self, idx):
    return self.data[idx]

def evaluate_accuracy(net, data_iter):
  if isinstance(net, torch.nn.Module):
    net.eval() # 评估模式
  metric = Accumulator(2) # 初始化累加的两个参数
  with torch.no_grad():
    for X, y in data_iter:
      # y.numel(): return the length (总数)
      metric.add(accuracy(net(X), y), y.numel())
  return metric[0] / metric[1] # 正确的个数除以总数

In [22]:
def train_epoch_ch3(net, train_iter, loss, updater):
  '''训练模型一个迭代周期, updater: optimizer'''
  if isinstance(net, torch.nn.Module):
    net.train()
  # 训练损失总和、训练准确度总和、样本数初始化
  metric = Accumulator(3)
  for X, y in train_iter:
    # 计算梯度
    y_hat = net(X) # 预测值输出: ([256, 10])
    l = loss(y_hat, y) # cross entropy
    if isinstance(updater, torch.optim.Optimizer):
      # 使用PyTorch内置的优化器和损失函数
      updater.zero_grad()
      l.backward()
      updater.step()
    else:
      # 使用定制的优化器和损失函数
      l.sum().backward() # l是向量, 转换为标量, pytorch自动转换
      updater(X.shape[0]) # X.shape 也就是 batch_size, 因为拉长了
    metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
  # 返回训练损失和训练精度
  return metric[0] / metric [2], metric[1] / metric [2]

In [25]:
def sgd(params, lr, batch_size):
  with torch.no_grad(): # don't record grad, 此函数只负责更新param
    for param in params:
      param -= lr * param.grad / batch_size
      param.grad.zero_() # set grad to 0
def updater(batch_size):
  return sgd([W,b], lr, batch_size)
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
  for epoch in range(num_epochs):
    train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
    test_acc = evaluate_accuracy(net, test_iter)
    train_loss, train_acc = train_metrics
    print(f'train_loss = {train_loss}, train_acc = {train_acc}, test_acc = {test_acc}')

train_ch3(net,
          train_iter,
          test_iter,
          cross_entropy,
          num_epochs,
          updater)

  cpuset_checked))


train_loss = 0.7861420282363891, train_acc = 0.7485666666666667, test_acc = 0.7889
train_loss = 0.5708365582148234, train_acc = 0.81365, test_acc = 0.8062
train_loss = 0.5259884218215942, train_acc = 0.8245833333333333, test_acc = 0.8161
train_loss = 0.5013544778188069, train_acc = 0.8317166666666667, test_acc = 0.8066
train_loss = 0.4739455588658651, train_acc = 0.8403666666666667, test_acc = 0.8264
train_loss = 0.46448530502319335, train_acc = 0.8438, test_acc = 0.8023
train_loss = 0.4585993151982625, train_acc = 0.8444666666666667, test_acc = 0.8267
train_loss = 0.4526930679321289, train_acc = 0.8451666666666666, test_acc = 0.829
train_loss = 0.4478394233703613, train_acc = 0.8479, test_acc = 0.8356


In [30]:
def predict_ch3(net, test_iter, n = 6):
  for X, y in test_iter:
    break
  trues = get_fashion_mnist_labels(y)
  preds = get_fashion_mnist_labels(net(X).argmax(axis=1))
  titles = [true +'\n' + pred for true, pred in zip(trues, preds)] 
  print(trues)
  print(preds)
predict_ch3(net, test_iter)

  cpuset_checked))


['ankle boot', 'pullover', 'trouser', 'trouser', 'shirt', 'trouser', 'coat', 'shirt', 'sandal', 'sneaker', 'coat', 'sandal', 'sneaker', 'dress', 'coat', 'trouser', 'pullover', 'coat', 'bag', 't-shirt', 'pullover', 'sandal', 'sneaker', 'ankle boot', 'trouser', 'coat', 'shirt', 't-shirt', 'ankle boot', 'dress', 'bag', 'bag', 'dress', 'dress', 'bag', 't-shirt', 'sneaker', 'sandal', 'sneaker', 'ankle boot', 'shirt', 'trouser', 'dress', 'sneaker', 'shirt', 'sneaker', 'pullover', 'trouser', 'pullover', 'pullover', 'coat', 'coat', 'sandal', 'bag', 'pullover', 'pullover', 'bag', 'coat', 'bag', 't-shirt', 'sneaker', 'sneaker', 'bag', 'sandal', 'trouser', 'trouser', 'pullover', 'dress', 'ankle boot', 'bag', 'sneaker', 't-shirt', 'pullover', 'shirt', 'pullover', 'dress', 'trouser', 'pullover', 'bag', 'coat', 'trouser', 'bag', 'sandal', 'ankle boot', 'sandal', 't-shirt', 'dress', 'pullover', 't-shirt', 'shirt', 'sandal', 'dress', 'shirt', 'sneaker', 'trouser', 'bag', 't-shirt', 'trouser', 'coat', 