<a href="https://colab.research.google.com/github/fredyah/tudui-pytorch/blob/main/tudui_pytorch_WhatIs_nn_Optimizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import torch
from torch import nn
from torch.nn import L1Loss, MSELoss, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.data import DataLoader
import torchvision
from torch.optim.lr_scheduler import StepLR

In [2]:
## 使用一個 nn.Module 做為 example

class Tudui(nn.Module):
  def __init__(self):
    super(Tudui, self).__init__()
    self.model1 = Sequential(
      Conv2d(3, 32, 5, padding=2),
      MaxPool2d(2),
      Conv2d(32, 32, 5, padding=2),
      MaxPool2d(2),
      Conv2d(32, 64, 5, padding=2),
      MaxPool2d(2),
      Flatten(),
      Linear(1024, 64),
      Linear(64, 10)
    )

  def forward(self, x):
    x = self.model1(x)
    return x

In [3]:
dataset = torchvision.datasets.CIFAR10(root="./dataset", train=False, transform=torchvision.transforms.ToTensor(),
                                       download=True)
dataloader = DataLoader(dataset, batch_size=1)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./dataset/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 79718273.16it/s]


Extracting ./dataset/cifar-10-python.tar.gz to ./dataset


In [None]:
tudui = Tudui()

optim = torch.optim.SGD(tudui.parameters(), lr=0.01)    ## 指定優化器為SGD, params 使用 tudui.parameters(), learning rate 設定為 0.01
scheduler = StepLR(optim, step_size=5, gamma=0.1)  ## 使用 StepLR 排程調整 optim 的 learning rate ，每5輪調整一次，也就是 * 0.1
## 梯度清零，還是一樣使用 optim 進行
## 參數調優，就需要由 optim 改為 scheduler 進行


loss_cross = CrossEntropyLoss()

for epoch in range(20):
  running_loss = 0.0
  for data in dataloader:
    imgs, targets = data
    outputs = tudui(imgs)
    #print(outputs)
    #print(targets)
    result_loss = loss_cross(outputs, targets)
    running_loss += result_loss
    #print(result_loss)
    optim.zero_grad()   ## 梯度清零 (這是因為 PyTorch 中的梯度是累加的（即每次調用 .backward() 時，計算的梯度會被加到已有的梯度中）。所以，在開始新的訓練步驟前，需要清除舊的梯度，避免累加錯誤的值)
    result_loss.backward()  ## 調用 loss 的反相傳播，找出每個節點的梯度，這些梯度將用來更新模型的權重，以使損失最小化。
    scheduler.step()   ## 對模行參數，進行調優。優化器根據計算出的梯度來調整模型的參數，使損失函數的值減少。常用的優化器包括 SGD（隨機梯度下降）、Adam 等
  print("Running Loss =", running_loss)



Running Loss = tensor(23058.5684, grad_fn=<AddBackward0>)
Running Loss = tensor(23058.5684, grad_fn=<AddBackward0>)
Running Loss = tensor(23058.5684, grad_fn=<AddBackward0>)
Running Loss = tensor(23058.5684, grad_fn=<AddBackward0>)
Running Loss = tensor(23058.5684, grad_fn=<AddBackward0>)
Running Loss = tensor(23058.5684, grad_fn=<AddBackward0>)
Running Loss = tensor(23058.5684, grad_fn=<AddBackward0>)
