# 导入必要的库

In [19]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from torch import nn
from torch import optim
import torch
import os

# 配置信息

In [20]:
config = {
    "data_size": 150,
    "train_size": 120,
    "test_size": 30,
    "feature_number": 4,
    "num_class": 3,
    "batch_size": 30,
    "save_checkpoint_steps": 5,
    "keep_checkpoint_max": 1,
    "out_dir_no_opt": os.path.join(".", "model_iris", "no_opt"),
    "out_dir_sgd": os.path.join(".", "model_iris", "sgd"),
    "out_dir_momentum": os.path.join(".", "model_iris", "momentum"),
    "out_dir_adam": os.path.join(".", "model_iris", "adam"),
    "out_dir_prefix": "checkpoint_fashion_forward",
}

# 神经网络定义

In [21]:
class Net(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.fc1 = nn.Linear(4, 5)
        self.fc2 = nn.Linear(5, 3)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return x

# 定义自己编写的优化器兼容 PyTorch 接口

## 无优化

In [22]:
class NoOptimizer(optim.Optimizer):
    def __init__(self, params, default={}) -> None:
        super().__init__(params, default)
        self.param_groups = params

    def step(self):
        return

## SGD

In [23]:
class MySGD(optim.Optimizer):
    def __init__(self, params, lr, default={}) -> None:
        super().__init__(params, default)
        self.lr = lr

    def step(self):
        for param_group in self.param_groups:
            params = param_group["params"]
            for param in params:
                param.data -= self.lr * param.grad

## Momentum

In [24]:
class MyMomentum(optim.Optimizer):
    def __init__(self, params, lr, momentum, default={}) -> None:
        super().__init__(params, default)
        self.lr = lr
        self.momentum = momentum
        self.v = []
        for param_group in self.param_groups:
            params = param_group["params"]
            self.v.append([torch.zeros_like(param.data) for param in params])

    def step(self):
        for i, param_group in enumerate(self.param_groups):
            params = param_group["params"]
            v = self.v[i]
            for j, param in enumerate(params):
                v[j] = self.momentum * v[j] - self.lr * param.grad
                param.data += v[j]

## Adam

In [25]:
class MyAdam(optim.Optimizer):
    def __init__(self, params, lr, beta1, beta2, epsilon, default={}) -> None:
        super().__init__(params, default)
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.v = []
        self.m = []
        for param_group in self.param_groups:
            params = param_group["params"]
            self.v.append([torch.zeros_like(param.data) for param in params])
            self.m.append([torch.zeros_like(param.data) for param in params])

    def step(self):
        for i, param_group in enumerate(self.param_groups):
            params = param_group["params"]
            m = self.m[i]
            v = self.v[i]
            for j, param in enumerate(params):
                m[j] = self.beta1 * m[j] + (1 - self.beta1) * param.grad
                v[j] = self.beta2 * v[j] + (1 - self.beta2) * torch.square(param.grad)
                param.data -= self.lr * m[j].div(self.epsilon + torch.sqrt(v[j]))

# 定义训练和测试过程

In [26]:
def train(data, net, loss_fn, optimizer):
    X_train, y_train = data
    for batch in range(y_train.shape[0]):
        pred = net(X_train[batch])
        loss = loss_fn(pred, y_train[batch])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


def test(data, net, loss_fn):
    X_train, y_train = data
    loss = 0
    currect = 0
    with torch.no_grad():
        for batch in range(y_train.shape[0]):
            pred = net(X_train[batch])
            loss += loss_fn(pred, y_train[batch])
            if torch.argmax(pred) == y_train[batch]:
                currect += 1
    loss /= y_train.shape[0]
    currect /= y_train.shape[0]
    return loss, currect

# 数据集

In [27]:
iris_X, iris_y = datasets.load_iris(return_X_y=True)
X_training, X_test, y_training, y_test = train_test_split(
    iris_X, iris_y, test_size=config["test_size"], train_size=config["train_size"]
)
X_training = torch.tensor(X_training, dtype=torch.float)
X_test = torch.tensor(X_test, dtype=torch.float)
y_training = torch.tensor(y_training, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# 初始化网络和损失函数
初始化 7 个网络，用于对比分析不同优化器的效果。

In [28]:
net0 = Net()
net1 = Net()
net2 = Net()
net3 = Net()
net4 = Net()
net5 = Net()
net6 = Net()

loss_fn = nn.CrossEntropyLoss()

# 初始化优化器

In [29]:
no_optimizer = NoOptimizer(
    params=net0.parameters(),
)

my_sgd = MySGD(
    params=net1.parameters(),
    lr=0.05,
)

sgd = optim.SGD(
    params=net2.parameters(),
    lr=0.05,
    momentum=0.0,
)

my_momentum = MyMomentum(
    params=net3.parameters(),
    lr=0.01,
    momentum=0.9,
)

momentum = optim.SGD(
    params=net4.parameters(),
    lr=0.01,
    momentum=0.9,
)

my_adam = MyAdam(
    params=net5.parameters(),
    lr=0.001,
    beta1=0.9,
    beta2=0.99,
    epsilon=1e-8,
)

adam = optim.Adam(
    params=net6.parameters(),
    lr=0.001,
    betas=[
        0.9,
        0.99,
    ],
    eps=1e-8,
)

# 比较各优化器效果

In [30]:
epoch = 20

In [31]:
print("=========== No optimizer ===========")
for _ in range(epoch):
    train(
        data=(X_training, y_training),
        net=net0,
        loss_fn=loss_fn,
        optimizer=no_optimizer,
    )

    test_loss, test_accuracy = test(
        data=(X_test, y_test),
        net=net0,
        loss_fn=loss_fn,
    )

    print("Epoch {}:\tLoss is {},\taccuracy is {}".format(_, test_loss, test_accuracy))

Epoch 0:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 1:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 2:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 3:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 4:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 5:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 6:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 7:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 8:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 9:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 10:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 11:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 12:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 13:	Loss is 1.5873159170150757,	accuracy is 0.36666666666666664
Epoch 14:	Loss is 1.5873159170

In [32]:
print("=========== My SGD ===========")
for _ in range(epoch):
    train(
        data=(X_training, y_training),
        net=net1,
        loss_fn=loss_fn,
        optimizer=my_sgd,
    )

    test_loss, test_accuracy = test(
        data=(X_test, y_test),
        net=net1,
        loss_fn=loss_fn,
    )

    print("Epoch {}:\tLoss is {},\taccuracy is {}".format(_, test_loss, test_accuracy))

Epoch 0:	Loss is 0.38999757170677185,	accuracy is 0.7666666666666667
Epoch 1:	Loss is 0.3392419219017029,	accuracy is 0.8
Epoch 2:	Loss is 0.3282116651535034,	accuracy is 0.8
Epoch 3:	Loss is 0.32055795192718506,	accuracy is 0.8
Epoch 4:	Loss is 0.30110231041908264,	accuracy is 0.8333333333333334
Epoch 5:	Loss is 0.27049845457077026,	accuracy is 0.8666666666666667
Epoch 6:	Loss is 0.24279597401618958,	accuracy is 0.9
Epoch 7:	Loss is 0.22894611954689026,	accuracy is 0.9
Epoch 8:	Loss is 0.2229236215353012,	accuracy is 0.9
Epoch 9:	Loss is 0.2187321037054062,	accuracy is 0.9
Epoch 10:	Loss is 0.2145044207572937,	accuracy is 0.9
Epoch 11:	Loss is 0.21003161370754242,	accuracy is 0.9
Epoch 12:	Loss is 0.20577366650104523,	accuracy is 0.9333333333333333
Epoch 13:	Loss is 0.20219554007053375,	accuracy is 0.9333333333333333
Epoch 14:	Loss is 0.19947503507137299,	accuracy is 0.9333333333333333
Epoch 15:	Loss is 0.1976454257965088,	accuracy is 0.9333333333333333
Epoch 16:	Loss is 0.19654807448

In [33]:
print("=========== PyTorch SGD ===========")
for _ in range(epoch):
    train(
        data=(X_training, y_training),
        net=net2,
        loss_fn=loss_fn,
        optimizer=sgd,
    )

    test_loss, test_accuracy = test(
        data=(X_test, y_test),
        net=net2,
        loss_fn=loss_fn,
    )

    print("Epoch {}:\tLoss is {},\taccuracy is {}".format(_, test_loss, test_accuracy))

Epoch 0:	Loss is 0.3873150646686554,	accuracy is 0.7333333333333333
Epoch 1:	Loss is 0.35090160369873047,	accuracy is 0.7666666666666667
Epoch 2:	Loss is 0.3443080484867096,	accuracy is 0.8
Epoch 3:	Loss is 0.33716824650764465,	accuracy is 0.8
Epoch 4:	Loss is 0.31342145800590515,	accuracy is 0.8333333333333334
Epoch 5:	Loss is 0.2778344452381134,	accuracy is 0.8666666666666667
Epoch 6:	Loss is 0.24967060983181,	accuracy is 0.8666666666666667
Epoch 7:	Loss is 0.23536576330661774,	accuracy is 0.9
Epoch 8:	Loss is 0.22808484733104706,	accuracy is 0.9
Epoch 9:	Loss is 0.22280992567539215,	accuracy is 0.9
Epoch 10:	Loss is 0.21782471239566803,	accuracy is 0.9
Epoch 11:	Loss is 0.21269993484020233,	accuracy is 0.9
Epoch 12:	Loss is 0.20778529345989227,	accuracy is 0.9333333333333333
Epoch 13:	Loss is 0.203510582447052,	accuracy is 0.9333333333333333
Epoch 14:	Loss is 0.1999613493680954,	accuracy is 0.9333333333333333
Epoch 15:	Loss is 0.19714969396591187,	accuracy is 0.9333333333333333
Epoc

In [34]:
print("=========== My Momentum ===========")
for _ in range(epoch):
    train(
        data=(X_training, y_training),
        net=net3,
        loss_fn=loss_fn,
        optimizer=my_momentum,
    )

    test_loss, test_accuracy = test(
        data=(X_test, y_test),
        net=net3,
        loss_fn=loss_fn,
    )

    print("Epoch {}:\tLoss is {},\taccuracy is {}".format(_, test_loss, test_accuracy))

Epoch 0:	Loss is 0.3673272430896759,	accuracy is 0.7333333333333333
Epoch 1:	Loss is 0.3536987900733948,	accuracy is 0.8
Epoch 2:	Loss is 1.2589279413223267,	accuracy is 0.7
Epoch 3:	Loss is 1.68669855594635,	accuracy is 0.7
Epoch 4:	Loss is 0.3379596769809723,	accuracy is 0.8333333333333334
Epoch 5:	Loss is 2.735685110092163,	accuracy is 0.7
Epoch 6:	Loss is 0.22797778248786926,	accuracy is 0.9333333333333333
Epoch 7:	Loss is 0.22898425161838531,	accuracy is 0.9
Epoch 8:	Loss is 0.5694103240966797,	accuracy is 0.7
Epoch 9:	Loss is 4.207204818725586,	accuracy is 0.7
Epoch 10:	Loss is 0.16615407168865204,	accuracy is 0.9
Epoch 11:	Loss is 0.18605954945087433,	accuracy is 0.9333333333333333
Epoch 12:	Loss is 0.2255353182554245,	accuracy is 0.9333333333333333
Epoch 13:	Loss is 0.20037534832954407,	accuracy is 0.9
Epoch 14:	Loss is 2.0313522815704346,	accuracy is 0.8
Epoch 15:	Loss is 3.411961078643799,	accuracy is 0.7
Epoch 16:	Loss is 3.983588218688965,	accuracy is 0.7
Epoch 17:	Loss is 

In [35]:
print("=========== PyTorch Momentum ===========")
for _ in range(epoch):
    train(
        data=(X_training, y_training),
        net=net4,
        loss_fn=loss_fn,
        optimizer=momentum,
    )

    test_loss, test_accuracy = test(
        data=(X_test, y_test),
        net=net4,
        loss_fn=loss_fn,
    )

    print("Epoch {}:\tLoss is {},\taccuracy is {}".format(_, test_loss, test_accuracy))

Epoch 0:	Loss is 0.7532129883766174,	accuracy is 0.7
Epoch 1:	Loss is 1.0476608276367188,	accuracy is 0.7
Epoch 2:	Loss is 0.21839022636413574,	accuracy is 0.9333333333333333
Epoch 3:	Loss is 1.2472529411315918,	accuracy is 0.7
Epoch 4:	Loss is 0.5363070368766785,	accuracy is 0.7
Epoch 5:	Loss is 0.19136972725391388,	accuracy is 0.9333333333333333
Epoch 6:	Loss is 0.23327694833278656,	accuracy is 0.9333333333333333
Epoch 7:	Loss is 0.27730992436408997,	accuracy is 0.9
Epoch 8:	Loss is 0.18601396679878235,	accuracy is 0.9333333333333333
Epoch 9:	Loss is 0.23065759241580963,	accuracy is 0.9333333333333333
Epoch 10:	Loss is 5.4513702392578125,	accuracy is 0.7
Epoch 11:	Loss is 0.1767035275697708,	accuracy is 0.9333333333333333
Epoch 12:	Loss is 0.18380142748355865,	accuracy is 0.9
Epoch 13:	Loss is 0.17421826720237732,	accuracy is 0.9333333333333333
Epoch 14:	Loss is 0.34657809138298035,	accuracy is 0.9
Epoch 15:	Loss is 0.60945063829422,	accuracy is 0.8333333333333334
Epoch 16:	Loss is 0

In [36]:
print("=========== My Adam ===========")
for _ in range(epoch):
    train(
        data=(X_training, y_training),
        net=net5,
        loss_fn=loss_fn,
        optimizer=my_adam,
    )

    test_loss, test_accuracy = test(
        data=(X_test, y_test),
        net=net5,
        loss_fn=loss_fn,
    )

    print("Epoch {}:\tLoss is {},\taccuracy is {}".format(_, test_loss, test_accuracy))

Epoch 0:	Loss is 1.0053554773330688,	accuracy is 0.36666666666666664
Epoch 1:	Loss is 0.9487549662590027,	accuracy is 0.5
Epoch 2:	Loss is 0.8877555131912231,	accuracy is 0.7666666666666667
Epoch 3:	Loss is 0.8264539837837219,	accuracy is 0.8333333333333334
Epoch 4:	Loss is 0.7664218544960022,	accuracy is 0.8333333333333334
Epoch 5:	Loss is 0.7091507315635681,	accuracy is 0.8333333333333334
Epoch 6:	Loss is 0.6560317873954773,	accuracy is 0.8333333333333334
Epoch 7:	Loss is 0.6080805063247681,	accuracy is 0.9
Epoch 8:	Loss is 0.5657724738121033,	accuracy is 0.9
Epoch 9:	Loss is 0.5290507674217224,	accuracy is 0.9333333333333333
Epoch 10:	Loss is 0.4974624216556549,	accuracy is 0.9333333333333333
Epoch 11:	Loss is 0.4703356921672821,	accuracy is 0.9333333333333333
Epoch 12:	Loss is 0.4469342529773712,	accuracy is 0.9666666666666667
Epoch 13:	Loss is 0.42656150460243225,	accuracy is 0.9666666666666667
Epoch 14:	Loss is 0.40861520171165466,	accuracy is 0.9666666666666667
Epoch 15:	Loss is

In [37]:
print("=========== PyTorch Adam ===========")
for _ in range(epoch):
    train(
        data=(X_training, y_training),
        net=net6,
        loss_fn=loss_fn,
        optimizer=adam,
    )

    test_loss, test_accuracy = test(
        data=(X_test, y_test),
        net=net6,
        loss_fn=loss_fn,
    )

    print("Epoch {}:\tLoss is {},\taccuracy is {}".format(_, test_loss, test_accuracy))

Epoch 0:	Loss is 0.9706352949142456,	accuracy is 0.36666666666666664
Epoch 1:	Loss is 0.8454355597496033,	accuracy is 0.9666666666666667
Epoch 2:	Loss is 0.7459096908569336,	accuracy is 0.8333333333333334
Epoch 3:	Loss is 0.6582104563713074,	accuracy is 0.8333333333333334
Epoch 4:	Loss is 0.5847419500350952,	accuracy is 0.8666666666666667
Epoch 5:	Loss is 0.5261387825012207,	accuracy is 0.9
Epoch 6:	Loss is 0.48016396164894104,	accuracy is 0.9666666666666667
Epoch 7:	Loss is 0.44390174746513367,	accuracy is 0.9666666666666667
Epoch 8:	Loss is 0.41469794511795044,	accuracy is 0.9666666666666667
Epoch 9:	Loss is 0.39045223593711853,	accuracy is 0.9666666666666667
Epoch 10:	Loss is 0.3696424067020416,	accuracy is 0.9666666666666667
Epoch 11:	Loss is 0.35124650597572327,	accuracy is 0.9666666666666667
Epoch 12:	Loss is 0.33463895320892334,	accuracy is 0.9666666666666667
Epoch 13:	Loss is 0.319449782371521,	accuracy is 0.9666666666666667
Epoch 14:	Loss is 0.3054209351539612,	accuracy is 0.9