# 1. 优化器


① 损失函数调用 backward 方法，就可以调用损失函数的反向传播方法，就可以求出我们需要调节的梯度，我们就可以利用我们的优化器就可以根据梯度对参数进行调整，达到整体误差降低的目的。

② 梯度要清零，如果梯度不清零会导致梯度累加。


# 2. 神经网络优化一轮


In [None]:
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(
    "./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True
)
dataloader = DataLoader(dataset, batch_size=64, drop_last=True)


class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10),
        )

    def forward(self, x):
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()  # 交叉熵
# loss = nn.MSELoss
tudui = Tudui()
# optim = torch.optim.SGD(tudui.parameters(),lr=0.01)   # 随机梯度下降优化器
optim = torch.optim.Adam(tudui.parameters(), lr=0.001)  # 随机梯度下降优化器
# 计算损失，梯度清零，反向传播，参数更新
for data in dataloader:
    imgs, targets = data
    outputs = tudui(imgs)
    result_loss = loss(outputs, targets)  # 计算实际输出与目标输出的差距
    optim.zero_grad()  # 梯度清零
    result_loss.backward()  # 反向传播，计算损失函数的梯度
    optim.step()  # 根据梯度，对网络的参数进行调优
    print(result_loss)  # 对数据只看了一遍，只看了一轮，所以loss下降不大

tensor(2.2990, grad_fn=<NllLossBackward0>)
tensor(2.3462, grad_fn=<NllLossBackward0>)
tensor(2.2930, grad_fn=<NllLossBackward0>)
tensor(2.2790, grad_fn=<NllLossBackward0>)
tensor(2.3023, grad_fn=<NllLossBackward0>)
tensor(2.2776, grad_fn=<NllLossBackward0>)
tensor(2.2654, grad_fn=<NllLossBackward0>)
tensor(2.2530, grad_fn=<NllLossBackward0>)
tensor(2.2054, grad_fn=<NllLossBackward0>)
tensor(2.1965, grad_fn=<NllLossBackward0>)
tensor(2.1876, grad_fn=<NllLossBackward0>)
tensor(2.2011, grad_fn=<NllLossBackward0>)
tensor(2.2157, grad_fn=<NllLossBackward0>)
tensor(2.2147, grad_fn=<NllLossBackward0>)
tensor(2.1963, grad_fn=<NllLossBackward0>)
tensor(2.1902, grad_fn=<NllLossBackward0>)
tensor(2.2693, grad_fn=<NllLossBackward0>)
tensor(2.1461, grad_fn=<NllLossBackward0>)
tensor(2.0569, grad_fn=<NllLossBackward0>)
tensor(2.1459, grad_fn=<NllLossBackward0>)
tensor(2.1481, grad_fn=<NllLossBackward0>)
tensor(2.0632, grad_fn=<NllLossBackward0>)
tensor(2.0564, grad_fn=<NllLossBackward0>)
tensor(2.11

# 3. 神经网络优化多轮


In [None]:
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(
    "./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True
)
dataloader = DataLoader(dataset, batch_size=64, drop_last=True)


class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10),
        )

    def forward(self, x):
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()  # 交叉熵
tudui = Tudui()
# optim = torch.optim.SGD(tudui.parameters(),lr=0.01)   # 随机梯度下降优化器
#
optim = torch.optim.Adam(tudui.parameters(), lr=0.001)  # 动量优化器,adam已经内置了动量
for epoch in range(10):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        result_loss = loss(outputs, targets)  # 计算实际输出与目标输出的差距
        optim.zero_grad()  # 梯度清零
        result_loss.backward()  # 反向传播，计算损失函数的梯度
        optim.step()  # 根据梯度，对网络的参数进行调优
        running_loss = running_loss + result_loss
    print(running_loss)  # 对这一轮所有误差的总和

tensor(285.9961, grad_fn=<AddBackward0>)
tensor(231.3197, grad_fn=<AddBackward0>)
tensor(204.8078, grad_fn=<AddBackward0>)
tensor(182.5047, grad_fn=<AddBackward0>)
tensor(163.2892, grad_fn=<AddBackward0>)
tensor(146.5211, grad_fn=<AddBackward0>)
tensor(127.8201, grad_fn=<AddBackward0>)
tensor(110.0710, grad_fn=<AddBackward0>)
tensor(95.1598, grad_fn=<AddBackward0>)
tensor(85.7083, grad_fn=<AddBackward0>)


# 4. 神经网络学习率优化


In [23]:
import torch
import torchvision
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

dataset = torchvision.datasets.CIFAR10(
    "./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=True
)
dataloader = DataLoader(dataset, batch_size=64, drop_last=True)


class Tudui(nn.Module):
    def __init__(self):
        super(Tudui, self).__init__()
        self.model1 = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10),
        )

    def forward(self, x):
        x = self.model1(x)
        return x


loss = nn.CrossEntropyLoss()  # 交叉熵
tudui = Tudui()
# optim = torch.optim.SGD(tudui.parameters(), lr=0.01)  # 随机梯度下降优化器
optim = torch.optim.Adam(tudui.parameters(), lr=0.001)  # 动量优化器

# todo 学习率衰减,每过 step_size 更新一次优化器，更新是学习率为原来的学习率的的 0.1 倍
scheduler = torch.optim.lr_scheduler.StepLR(optim, step_size=5, gamma=0.1)
print("dataset len: ",len(dataloader))
for epoch in range(10):
    running_loss = 0.0
    scheduler.step()#  学习率衰减,放在每轮epoch之后，如果在内层，则很快就收敛
    for data in dataloader:
        imgs, targets = data
        outputs = tudui(imgs)
        result_loss = loss(outputs, targets)  # 计算实际输出与目标输出的差距
        optim.zero_grad()  # 梯度清零
        result_loss.backward()  # 反向传播，计算损失函数的梯度
        optim.step()  # 根据梯度，对网络的参数进行调优
        
        # 学习率太小了，所以20个轮次后，相当于没走多少
        running_loss = running_loss + result_loss
    print(running_loss)  # 对这一轮所有误差的总和

dataset len:  156




KeyboardInterrupt: 