In [1]:
import torch
from torch import nn
from d2l import torch as d2l

from utlis import *
import models
import wandb


In [2]:
import wandb

def train_models(net, 
                 train_loader, 
                 test_loader, 
                 epochs, 
                 lr, 
                 net_type : str =  None,
                 use_wandb : bool = False,
                 device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')):
    # 设置优化器
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    # 设置损失函数
    loss = nn.CrossEntropyLoss()
    
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    net = net.to(device)
    
    epoch_list = []
    train_loss = []
    train_acc = []
    test_acc = []
    
    if use_wandb:
        wandb.watch(net, log="all")

    print('training on', device)
    
    
    for epoch in range(epochs):
    # 训练
        loss_sum = 0
        acc = 0
        net.train()
        for idx, (x, y) in enumerate(train_loader):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            predict = net(x)
            l = loss(predict, y)
            l.backward()
            optimizer.step()
            
            loss_sum += l.item() * x.size(0)
            acc += predict.max(dim=1)[1].eq(y).sum().item()
            if (idx + 1) % (len(train_loader) // 5) == 0 or idx == len(train_loader) - 1:
                if (idx + 1) != len(train_loader):
                    loss_now = loss_sum / (idx + 1) / x.size(0)
                    acc_now = acc / (idx + 1) / x.size(0)
                else:
                    loss_now = loss_sum / len(train_loader.dataset)
                    acc_now = acc / len(train_loader.dataset)
                
                if use_wandb:
                    wandb.log({"train_loss": loss_now, "train_acc": acc_now})
                train_acc.append(acc_now)
                train_loss.append(loss_now)
                epoch_list.append(epoch + (idx + 1) / len(train_loader))         
        # loss_sum /= len(train_loader.dataset)
        # acc /= len(train_loader.dataset)
        # train_loss.append(loss_sum)
        # train_acc.append(acc)
            
        # 测试
        loss_sum = 0
        acc = 0
        for idx, (x, y) in enumerate(test_loader):
            x, y = x.to(device), y.to(device)
            predict = net(x)
            l = loss(predict, y)
            
            acc += predict.max(dim=1)[1].eq(y).sum().item()
            
        acc /= len(test_loader.dataset)
        test_acc.append(acc)
        # print(epoch_list)
        
        # print('epoch: {}, train loss: {:.4f}, train acc: {:.4f}, test acc: {:.4f}'.format(epoch + 1, train_loss[-1], train_acc[-1], test_acc[-1]))
    # print(len(train_loss), len(train_acc), len(test_acc))
    
    epoch_test = list(range(1, int(epoch_list[-1]) + 1, 1))
    
    if net_type is not None: 
    
        write2csv('ch7_01.csv', epoch_list, net_type + '_epoch_train')
        write2csv('ch7_01.csv', train_loss, net_type + '_train_loss')
        write2csv('ch7_01.csv', train_acc, net_type + '_train_acc')
        write2csv('ch7_01_eval.csv', epoch_test, net_type + '_epoch_test')
        write2csv('ch7_01_eval.csv', test_acc, net_type + '_test_acc')
        print('record the data')
    else:
        print('net_type is None, don\'t record the data')
    
    # draw_loss_acc(train_loss, train_acc, test_acc, epoch_list)
    

In [13]:
print()

range(0, 5)


In [4]:
AlexNet = models.AlexNet()
LeNet = models.LeNet()
train_loader, test_loader = load_data_fashion_mnist(batch_size=128)
# print(train_loader.dataset)

In [5]:
train_models(LeNet, train_loader, test_loader, epochs=50, lr=0.9, net_type='LeNet')

epoch: 1, train loss: 2.3141, train acc: 0.1026, test acc: 0.1000
epoch: 2, train loss: 1.4155, train acc: 0.4433, test acc: 0.6476
epoch: 3, train loss: 0.7276, train acc: 0.7164, test acc: 0.7335
epoch: 4, train loss: 0.6053, train acc: 0.7628, test acc: 0.7469
epoch: 5, train loss: 0.5366, train acc: 0.7934, test acc: 0.7969
epoch: 6, train loss: 0.4911, train acc: 0.8137, test acc: 0.8145
epoch: 7, train loss: 0.4517, train acc: 0.8304, test acc: 0.8220
epoch: 8, train loss: 0.4288, train acc: 0.8398, test acc: 0.8190
epoch: 9, train loss: 0.4099, train acc: 0.8470, test acc: 0.8364
epoch: 10, train loss: 0.3936, train acc: 0.8521, test acc: 0.8209
epoch: 11, train loss: 0.3803, train acc: 0.8571, test acc: 0.8474
epoch: 12, train loss: 0.3692, train acc: 0.8619, test acc: 0.8516
epoch: 13, train loss: 0.3581, train acc: 0.8656, test acc: 0.8534
epoch: 14, train loss: 0.3486, train acc: 0.8683, test acc: 0.8515
epoch: 15, train loss: 0.3393, train acc: 0.8727, test acc: 0.8554
epoc

In [6]:
train_loader, test_loader = load_data_fashion_mnist(batch_size=128, resize=224)
train_models(AlexNet, train_loader, test_loader, epochs=50, lr=0.01, net_type='AlexNet')

epoch: 1, train loss: 1.3502, train acc: 0.4944, test acc: 0.7036
epoch: 2, train loss: 0.6580, train acc: 0.7535, test acc: 0.7621
epoch: 3, train loss: 0.5401, train acc: 0.7997, test acc: 0.8017
epoch: 4, train loss: 0.4760, train acc: 0.8236, test acc: 0.8251
epoch: 5, train loss: 0.4291, train acc: 0.8429, test acc: 0.8423
epoch: 6, train loss: 0.3985, train acc: 0.8557, test acc: 0.8423
epoch: 7, train loss: 0.3767, train acc: 0.8633, test acc: 0.8580
epoch: 8, train loss: 0.3581, train acc: 0.8690, test acc: 0.8651
epoch: 9, train loss: 0.3423, train acc: 0.8754, test acc: 0.8680
epoch: 10, train loss: 0.3295, train acc: 0.8797, test acc: 0.8722
epoch: 11, train loss: 0.3177, train acc: 0.8851, test acc: 0.8743
epoch: 12, train loss: 0.3066, train acc: 0.8881, test acc: 0.8799
epoch: 13, train loss: 0.2971, train acc: 0.8907, test acc: 0.8790
epoch: 14, train loss: 0.2900, train acc: 0.8942, test acc: 0.8854
epoch: 15, train loss: 0.2808, train acc: 0.8969, test acc: 0.8802
epoc

In [7]:
# 简化设计
train_loader, test_loader = load_data_fashion_mnist(batch_size=128)
AlexSimple = models.AlexNetSimple()
train_models(AlexSimple, train_loader, test_loader, epochs=50, lr=0.01, net_type='AlexSimple')


epoch: 1, train loss: 1.7878, train acc: 0.3581, test acc: 0.5738
epoch: 2, train loss: 0.9173, train acc: 0.6454, test acc: 0.6816
epoch: 3, train loss: 0.7681, train acc: 0.7049, test acc: 0.7151
epoch: 4, train loss: 0.6940, train acc: 0.7343, test acc: 0.7283
epoch: 5, train loss: 0.6403, train acc: 0.7560, test acc: 0.7579
epoch: 6, train loss: 0.6014, train acc: 0.7713, test acc: 0.7685
epoch: 7, train loss: 0.5725, train acc: 0.7826, test acc: 0.7765
epoch: 8, train loss: 0.5459, train acc: 0.7940, test acc: 0.7886
epoch: 9, train loss: 0.5278, train acc: 0.8009, test acc: 0.7987
epoch: 10, train loss: 0.5095, train acc: 0.8078, test acc: 0.7907
epoch: 11, train loss: 0.4938, train acc: 0.8153, test acc: 0.8056
epoch: 12, train loss: 0.4809, train acc: 0.8193, test acc: 0.8110
epoch: 13, train loss: 0.4653, train acc: 0.8261, test acc: 0.8170
epoch: 14, train loss: 0.4542, train acc: 0.8303, test acc: 0.8246
epoch: 15, train loss: 0.4415, train acc: 0.8367, test acc: 0.8246
epoc

In [8]:
train_models(AlexSimple, train_loader, test_loader, epochs=50, lr=0.1, net_type='AlexSimple_2')


epoch: 1, train loss: 0.9032, train acc: 0.6575, test acc: 0.7757
epoch: 2, train loss: 0.5027, train acc: 0.8099, test acc: 0.8304
epoch: 3, train loss: 0.4153, train acc: 0.8458, test acc: 0.8343
epoch: 4, train loss: 0.3654, train acc: 0.8644, test acc: 0.8669
epoch: 5, train loss: 0.3381, train acc: 0.8734, test acc: 0.8729
epoch: 6, train loss: 0.3147, train acc: 0.8835, test acc: 0.8777
epoch: 7, train loss: 0.2944, train acc: 0.8912, test acc: 0.8803
epoch: 8, train loss: 0.2849, train acc: 0.8941, test acc: 0.8840
epoch: 9, train loss: 0.2699, train acc: 0.8986, test acc: 0.8864
epoch: 10, train loss: 0.2590, train acc: 0.9033, test acc: 0.8928
epoch: 11, train loss: 0.2468, train acc: 0.9076, test acc: 0.8882
epoch: 12, train loss: 0.2379, train acc: 0.9101, test acc: 0.8881
epoch: 13, train loss: 0.2279, train acc: 0.9147, test acc: 0.8914
epoch: 14, train loss: 0.2186, train acc: 0.9169, test acc: 0.9020
epoch: 15, train loss: 0.2078, train acc: 0.9217, test acc: 0.9036
epoc

In [7]:
LeNetPro = models.LeNetPro()
train_loader, test_loader = load_data_fashion_mnist(batch_size=128)
# keep lr = 0.1
# use 0.9 被冻结
train_models(LeNetPro, train_loader, test_loader, epochs=50, lr=0.1, net_type='LeNetPro')

epoch: 1, train loss: 1.2333, train acc: 0.5367, test acc: 0.6837
epoch: 2, train loss: 0.7113, train acc: 0.7345, test acc: 0.7488
epoch: 3, train loss: 0.6213, train acc: 0.7701, test acc: 0.7684
epoch: 4, train loss: 0.5720, train acc: 0.7910, test acc: 0.7941
epoch: 5, train loss: 0.5327, train acc: 0.8081, test acc: 0.7993
epoch: 6, train loss: 0.5060, train acc: 0.8209, test acc: 0.8124
epoch: 7, train loss: 0.4841, train acc: 0.8292, test acc: 0.8263
epoch: 8, train loss: 0.4653, train acc: 0.8371, test acc: 0.8284
epoch: 9, train loss: 0.4459, train acc: 0.8429, test acc: 0.8349
epoch: 10, train loss: 0.4319, train acc: 0.8477, test acc: 0.8438
epoch: 11, train loss: 0.4195, train acc: 0.8529, test acc: 0.8472
epoch: 12, train loss: 0.4064, train acc: 0.8561, test acc: 0.8459
epoch: 13, train loss: 0.4000, train acc: 0.8606, test acc: 0.8525
epoch: 14, train loss: 0.3923, train acc: 0.8637, test acc: 0.8482
epoch: 15, train loss: 0.3835, train acc: 0.8652, test acc: 0.8587
epoc

In [10]:
LeNet = models.LeNet()
train_loader, test_loader = load_data_fashion_mnist(batch_size=128)
# keep lr = 0.1
train_models(LeNet, train_loader, test_loader, epochs=50, lr=0.1, net_type='LeNet_0.1_')

epoch: 1, train loss: 2.3074, train acc: 0.1014, test acc: 0.1000
epoch: 2, train loss: 2.2988, train acc: 0.1141, test acc: 0.1470
epoch: 3, train loss: 1.9039, train acc: 0.3284, test acc: 0.4779
epoch: 4, train loss: 1.1888, train acc: 0.5668, test acc: 0.5971
epoch: 5, train loss: 0.9870, train acc: 0.6279, test acc: 0.6315
epoch: 6, train loss: 0.8815, train acc: 0.6714, test acc: 0.6727
epoch: 7, train loss: 0.8192, train acc: 0.6967, test acc: 0.7047
epoch: 8, train loss: 0.7696, train acc: 0.7145, test acc: 0.7163
epoch: 9, train loss: 0.7240, train acc: 0.7287, test acc: 0.7116
epoch: 10, train loss: 0.6889, train acc: 0.7355, test acc: 0.7132
epoch: 11, train loss: 0.6653, train acc: 0.7419, test acc: 0.7401
epoch: 12, train loss: 0.6466, train acc: 0.7477, test acc: 0.7421
epoch: 13, train loss: 0.6305, train acc: 0.7551, test acc: 0.7578
epoch: 14, train loss: 0.6175, train acc: 0.7607, test acc: 0.7600
epoch: 15, train loss: 0.6051, train acc: 0.7665, test acc: 0.7670
epoc

In [11]:
LeNetPro = models.LeNetPro()
train_loader, test_loader = load_data_fashion_mnist(batch_size=128)
# keep lr = 0.1
# use 0.9 被冻结
train_models(LeNetPro, train_loader, test_loader, epochs=50, lr=0.3, net_type='LeNetPro_0.2')

epoch: 1, train loss: 0.9427, train acc: 0.6481, test acc: 0.7380
epoch: 2, train loss: 0.5876, train acc: 0.7853, test acc: 0.7970
epoch: 3, train loss: 0.5120, train acc: 0.8167, test acc: 0.8133
epoch: 4, train loss: 0.4669, train acc: 0.8337, test acc: 0.8323
epoch: 5, train loss: 0.4407, train acc: 0.8429, test acc: 0.8365
epoch: 6, train loss: 0.4236, train acc: 0.8514, test acc: 0.8467
epoch: 7, train loss: 0.4094, train acc: 0.8556, test acc: 0.8460
epoch: 8, train loss: 0.3926, train acc: 0.8595, test acc: 0.8560
epoch: 9, train loss: 0.3790, train acc: 0.8657, test acc: 0.8546
epoch: 10, train loss: 0.3704, train acc: 0.8683, test acc: 0.8606
epoch: 11, train loss: 0.3638, train acc: 0.8708, test acc: 0.8629
epoch: 12, train loss: 0.3554, train acc: 0.8739, test acc: 0.8660
epoch: 13, train loss: 0.3481, train acc: 0.8755, test acc: 0.8631
epoch: 14, train loss: 0.3413, train acc: 0.8768, test acc: 0.8621
epoch: 15, train loss: 0.3374, train acc: 0.8795, test acc: 0.8705
epoc

In [12]:
LeNet = models.LeNet()
train_loader, test_loader = load_data_fashion_mnist(batch_size=128)
# keep lr = 0.1
train_models(LeNet, train_loader, test_loader, epochs=50, lr=0.3, net_type='LeNet_0.2_')

epoch: 1, train loss: 2.3041, train acc: 0.1169, test acc: 0.1827
epoch: 2, train loss: 1.2234, train acc: 0.5425, test acc: 0.6406
epoch: 3, train loss: 0.8257, train acc: 0.6839, test acc: 0.6687
epoch: 4, train loss: 0.7074, train acc: 0.7251, test acc: 0.7195
epoch: 5, train loss: 0.6507, train acc: 0.7456, test acc: 0.7557
epoch: 6, train loss: 0.6152, train acc: 0.7607, test acc: 0.7572
epoch: 7, train loss: 0.5799, train acc: 0.7755, test acc: 0.7779
epoch: 8, train loss: 0.5560, train acc: 0.7860, test acc: 0.7948
epoch: 9, train loss: 0.5270, train acc: 0.7996, test acc: 0.7945
epoch: 10, train loss: 0.5069, train acc: 0.8074, test acc: 0.7898
epoch: 11, train loss: 0.4881, train acc: 0.8168, test acc: 0.8146
epoch: 12, train loss: 0.4700, train acc: 0.8240, test acc: 0.8226
epoch: 13, train loss: 0.4575, train acc: 0.8308, test acc: 0.8270
epoch: 14, train loss: 0.4436, train acc: 0.8358, test acc: 0.8110
epoch: 15, train loss: 0.4322, train acc: 0.8407, test acc: 0.8286
epoc

In [None]:
import torchvision.transforms as transforms


In [27]:
x, y = next(iter(train_loader))
x.shape
# print(len(x))

torch.Size([128, 1, 28, 28])

In [3]:
# use wandb to monitor the GPU training process
import wandb

wandb.init(project="CH7",
           name="AlexNet_size_64")

config = wandb.config  # config的初始化
config.batch_size = 64  
config.epochs = 50  
config.lr = 0.01   
config.use_cuda = True

train_loader, test_loader = load_data_fashion_mnist(batch_size=config.batch_size, resize=224)
AlexNet = models.AlexNet()
wandb.watch_called = False 

train_models(AlexNet, train_loader, test_loader, epochs=config.epochs, lr=config.lr, use_wandb=True)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mwangxinming[0m ([33mgogowang[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

epoch: 1, train loss: 1.0748, train acc: 0.6024, test acc: 0.7634
epoch: 2, train loss: 0.5389, train acc: 0.8007, test acc: 0.8100
epoch: 3, train loss: 0.4422, train acc: 0.8389, test acc: 0.8210
epoch: 4, train loss: 0.3879, train acc: 0.8582, test acc: 0.8594
epoch: 5, train loss: 0.3571, train acc: 0.8696, test acc: 0.8532
epoch: 6, train loss: 0.3337, train acc: 0.8771, test acc: 0.8695
epoch: 7, train loss: 0.3136, train acc: 0.8859, test acc: 0.8764
epoch: 8, train loss: 0.3006, train acc: 0.8892, test acc: 0.8784
epoch: 9, train loss: 0.2858, train acc: 0.8946, test acc: 0.8788
epoch: 10, train loss: 0.2740, train acc: 0.8993, test acc: 0.8879
epoch: 11, train loss: 0.2637, train acc: 0.9041, test acc: 0.8927
epoch: 12, train loss: 0.2528, train acc: 0.9077, test acc: 0.8864
epoch: 13, train loss: 0.2432, train acc: 0.9105, test acc: 0.8816
epoch: 14, train loss: 0.2358, train acc: 0.9125, test acc: 0.8974
epoch: 15, train loss: 0.2275, train acc: 0.9159, test acc: 0.9008
epoc

In [4]:
wandb.init(project="CH7",
           name="AlexNet_size_128")

config = wandb.config  # config的初始化
config.batch_size = 128  
config.epochs = 50  
config.lr = 0.01   
config.use_cuda = True

# train_loader, test_loader = load_data_fashion_mnist(batch_size=config.batch_size, resize=224)
# AlexNet = models.AlexNet()
wandb.watch_called = False 

train_models(AlexNet, train_loader, test_loader, epochs=config.epochs, lr=config.lr, use_wandb=True)

0,1
train_acc,▁▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
train_loss,█▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.98038
train_loss,0.05063


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

epoch: 1, train loss: 1.0093, train acc: 0.6275, test acc: 0.7633
epoch: 2, train loss: 0.5321, train acc: 0.8000, test acc: 0.8117
epoch: 3, train loss: 0.4397, train acc: 0.8378, test acc: 0.8442
epoch: 4, train loss: 0.3876, train acc: 0.8571, test acc: 0.8522
epoch: 5, train loss: 0.3549, train acc: 0.8695, test acc: 0.8663
epoch: 6, train loss: 0.3316, train acc: 0.8779, test acc: 0.8588
epoch: 7, train loss: 0.3144, train acc: 0.8838, test acc: 0.8789
epoch: 8, train loss: 0.2988, train acc: 0.8894, test acc: 0.8744
epoch: 9, train loss: 0.2859, train acc: 0.8938, test acc: 0.8829
epoch: 10, train loss: 0.2758, train acc: 0.8976, test acc: 0.8849
epoch: 11, train loss: 0.2638, train acc: 0.9020, test acc: 0.8850
epoch: 12, train loss: 0.2548, train acc: 0.9057, test acc: 0.8953
epoch: 13, train loss: 0.2450, train acc: 0.9093, test acc: 0.8888
epoch: 14, train loss: 0.2383, train acc: 0.9115, test acc: 0.9006
epoch: 15, train loss: 0.2294, train acc: 0.9150, test acc: 0.8950
epoc

In [5]:
wandb.init(project="CH7",
           name="AlexNet_size_256")

config = wandb.config  # config的初始化
config.batch_size = 256  
config.epochs = 50  
config.lr = 0.01   
config.use_cuda = True

# train_loader, test_loader = load_data_fashion_mnist(batch_size=config.batch_size, resize=224)
# AlexNet = models.AlexNet()
wandb.watch_called = False 

train_models(AlexNet, train_loader, test_loader, epochs=config.epochs, lr=config.lr, use_wandb=True)

VBox(children=(Label(value='0.001 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.057543…

0,1
train_acc,▁▄▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████████████
train_loss,█▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train_acc,0.9797
train_loss,0.05455


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

epoch: 1, train loss: 0.9569, train acc: 0.6430, test acc: 0.7544
epoch: 2, train loss: 0.5255, train acc: 0.8019, test acc: 0.8102
epoch: 3, train loss: 0.4324, train acc: 0.8411, test acc: 0.8345
epoch: 4, train loss: 0.3814, train acc: 0.8596, test acc: 0.8591
epoch: 5, train loss: 0.3502, train acc: 0.8716, test acc: 0.8612
epoch: 6, train loss: 0.3287, train acc: 0.8789, test acc: 0.8658
epoch: 7, train loss: 0.3111, train acc: 0.8851, test acc: 0.8735
epoch: 8, train loss: 0.2968, train acc: 0.8901, test acc: 0.8802
epoch: 9, train loss: 0.2845, train acc: 0.8953, test acc: 0.8759
epoch: 10, train loss: 0.2730, train acc: 0.8980, test acc: 0.8790
epoch: 11, train loss: 0.2625, train acc: 0.9018, test acc: 0.8896
epoch: 12, train loss: 0.2537, train acc: 0.9051, test acc: 0.8933
epoch: 13, train loss: 0.2441, train acc: 0.9095, test acc: 0.8943
epoch: 14, train loss: 0.2371, train acc: 0.9129, test acc: 0.8916
epoch: 15, train loss: 0.2293, train acc: 0.9141, test acc: 0.8993
epoc

## 7.2 使用块的网络（VGG）

In [1]:
import torch
from torch import nn
from utlis import *
import models
import wandb

def train_models(net, 
                 train_loader, 
                 test_loader, 
                 epochs, 
                 lr, 
                 net_type : str =  None,
                 use_wandb : bool = False,
                 device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')):
    # 设置优化器
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    # 设置损失函数
    loss = nn.CrossEntropyLoss()
    
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    net = net.to(device)
    
    epoch_list = []
    train_loss = []
    train_acc = []
    test_acc = []
    
    if use_wandb:
        wandb.watch(net, log="all")

    print('training on', device)
    
    
    for epoch in range(epochs):
    # 训练
        loss_sum = 0
        acc = 0
        net.train()
        for idx, (x, y) in enumerate(train_loader):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            predict = net(x)
            l = loss(predict, y)
            l.backward()
            optimizer.step()
            
            loss_sum += l.item() * x.size(0)
            acc += predict.max(dim=1)[1].eq(y).sum().item()
            if (idx + 1) % (len(train_loader) // 5) == 0 or idx == len(train_loader) - 1:
                if (idx + 1) != len(train_loader):
                    loss_now = loss_sum / (idx + 1) / x.size(0)
                    acc_now = acc / (idx + 1) / x.size(0)
                else:
                    loss_now = loss_sum / len(train_loader.dataset)
                    acc_now = acc / len(train_loader.dataset)
                
                if use_wandb:
                    wandb.log({"train_loss": loss_now, "train_acc": acc_now})
                train_acc.append(acc_now)
                train_loss.append(loss_now)
                epoch_list.append(epoch + (idx + 1) / len(train_loader))         
        # loss_sum /= len(train_loader.dataset)
        # acc /= len(train_loader.dataset)
        # train_loss.append(loss_sum)
        # train_acc.append(acc)
            
        # 测试
        loss_sum = 0
        acc = 0
        for idx, (x, y) in enumerate(test_loader):
            x, y = x.to(device), y.to(device)
            predict = net(x)
            l = loss(predict, y)
            
            acc += predict.max(dim=1)[1].eq(y).sum().item()
            
        acc /= len(test_loader.dataset)
        test_acc.append(acc)
        # print(epoch_list)
        
        print('epoch: {}, train loss: {:.4f}, train acc: {:.4f}, test acc: {:.4f}'.format(epoch + 1, train_loss[-1], train_acc[-1], test_acc[-1]))
    # print(len(train_loss), len(train_acc), len(test_acc))
    
    epoch_test = list(range(1, int(epoch_list[-1]) + 1, 1))
    
    if net_type is not None: 
    
        write2csv('ch7_02.csv', epoch_list, net_type + '_epoch_train')
        write2csv('ch7_02.csv', train_loss, net_type + '_train_loss')
        write2csv('ch7_02.csv', train_acc, net_type + '_train_acc')
        write2csv('ch7_02_eval.csv', epoch_test, net_type + '_epoch_test')
        write2csv('ch7_02_eval.csv', test_acc, net_type + '_test_acc')
        print('record the data')
    else:
        print('net_type is None, don\'t record the data')
    
    # draw_loss_acc(train_loss, train_acc, test_acc, epoch_list)
    

In [2]:

Vgg11 = models.vgg11(coef=4)
train_loader, test_loader = load_data_fashion_mnist(batch_size=64, resize=224)

train_models(Vgg11, train_loader, test_loader, epochs=20, lr=0.05, net_type='Vgg11_224')

training on cuda:0
epoch: 1, train loss: 0.7362, train acc: 0.7266, test acc: 0.8459
epoch: 2, train loss: 0.3385, train acc: 0.8759, test acc: 0.8769
epoch: 3, train loss: 0.2792, train acc: 0.8973, test acc: 0.8855
epoch: 4, train loss: 0.2437, train acc: 0.9105, test acc: 0.9026
epoch: 5, train loss: 0.2172, train acc: 0.9193, test acc: 0.9095
epoch: 6, train loss: 0.1975, train acc: 0.9273, test acc: 0.9114
epoch: 7, train loss: 0.1796, train acc: 0.9334, test acc: 0.9087
epoch: 8, train loss: 0.1611, train acc: 0.9413, test acc: 0.9153
epoch: 9, train loss: 0.1444, train acc: 0.9463, test acc: 0.9156
epoch: 10, train loss: 0.1291, train acc: 0.9519, test acc: 0.9095
epoch: 11, train loss: 0.1171, train acc: 0.9565, test acc: 0.9177
epoch: 12, train loss: 0.1025, train acc: 0.9625, test acc: 0.9191
epoch: 13, train loss: 0.0873, train acc: 0.9679, test acc: 0.9195
epoch: 14, train loss: 0.0766, train acc: 0.9723, test acc: 0.9223
epoch: 15, train loss: 0.0671, train acc: 0.9756, te

In [2]:
train_loader, test_loader = load_data_fashion_mnist(batch_size=64, resize=96)
Vgg11 = models.vgg11(coef=4, input_size=96)
train_models(Vgg11, train_loader, test_loader, epochs=20, lr=0.05, net_type='Vgg11_96')

training on cuda:0
epoch: 1, train loss: 1.1540, train acc: 0.5661, test acc: 0.8133
epoch: 2, train loss: 0.4084, train acc: 0.8477, test acc: 0.8560
epoch: 3, train loss: 0.3296, train acc: 0.8778, test acc: 0.8737
epoch: 4, train loss: 0.2906, train acc: 0.8930, test acc: 0.8790
epoch: 5, train loss: 0.2630, train acc: 0.9028, test acc: 0.8943
epoch: 6, train loss: 0.2408, train acc: 0.9103, test acc: 0.8961
epoch: 7, train loss: 0.2202, train acc: 0.9195, test acc: 0.8925
epoch: 8, train loss: 0.2042, train acc: 0.9244, test acc: 0.9055
epoch: 9, train loss: 0.1890, train acc: 0.9304, test acc: 0.8958
epoch: 10, train loss: 0.1747, train acc: 0.9344, test acc: 0.9142
epoch: 11, train loss: 0.1606, train acc: 0.9409, test acc: 0.9138
epoch: 12, train loss: 0.1492, train acc: 0.9443, test acc: 0.9045
epoch: 13, train loss: 0.1353, train acc: 0.9495, test acc: 0.9146
epoch: 14, train loss: 0.1253, train acc: 0.9527, test acc: 0.8762
epoch: 15, train loss: 0.1140, train acc: 0.9579, te

## 7.3 网络中的网络(NiN)

In [1]:
import torch
from torch import nn
from utlis import *
import models
import wandb

def train_models(net, 
                 train_loader, 
                 test_loader, 
                 epochs, 
                 lr, 
                 net_type : str =  None,
                 use_wandb : bool = False,
                 device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')):
    # 设置优化器
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    # 设置损失函数
    loss = nn.CrossEntropyLoss()
    
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    net = net.to(device)
    
    epoch_list = []
    train_loss = []
    train_acc = []
    test_acc = []
    
    if use_wandb:
        wandb.watch(net, log="all")

    print('training on', device)
    
    
    for epoch in range(epochs):
    # 训练
        loss_sum = 0
        acc = 0
        net.train()
        for idx, (x, y) in enumerate(train_loader):
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            predict = net(x)
            l = loss(predict, y)
            l.backward()
            optimizer.step()
            
            loss_sum += l.item() * x.size(0)
            acc += predict.max(dim=1)[1].eq(y).sum().item()
            if (idx + 1) % (len(train_loader) // 5) == 0 or idx == len(train_loader) - 1:
                if (idx + 1) != len(train_loader):
                    loss_now = loss_sum / (idx + 1) / x.size(0)
                    acc_now = acc / (idx + 1) / x.size(0)
                else:
                    loss_now = loss_sum / len(train_loader.dataset)
                    acc_now = acc / len(train_loader.dataset)
                
                if use_wandb:
                    wandb.log({"train_loss": loss_now, "train_acc": acc_now})
                train_acc.append(acc_now)
                train_loss.append(loss_now)
                epoch_list.append(epoch + (idx + 1) / len(train_loader))         
        # loss_sum /= len(train_loader.dataset)
        # acc /= len(train_loader.dataset)
        # train_loss.append(loss_sum)
        # train_acc.append(acc)
            
        # 测试
        loss_sum = 0
        acc = 0
        with torch.no_grad():
            net.eval()
            for idx, (x, y) in enumerate(test_loader):
                x, y = x.to(device), y.to(device)
                predict = net(x)
                l = loss(predict, y)
                
                acc += predict.max(dim=1)[1].eq(y).sum().item()
                
            acc /= len(test_loader.dataset)
            test_acc.append(acc)
        if use_wandb:
            wandb.log({"test_loss": acc})
        # print(epoch_list)
        if epoch / epochs > 0.5:
            print('epoch: {}, train loss: {:.4f}, train acc: {:.4f}, test acc: {:.4f}'.format(epoch + 1, train_loss[-1], train_acc[-1], test_acc[-1]))
    # print(len(train_loss), len(train_acc), len(test_acc))
    
    epoch_test = list(range(1, int(epoch_list[-1]) + 1, 1))
    
    if net_type is not None: 
    
        write2csv('ch7_03.csv', epoch_list, net_type + '_epoch_train')
        write2csv('ch7_03.csv', train_loss, net_type + '_train_loss')
        write2csv('ch7_03.csv', train_acc, net_type + '_train_acc')
        write2csv('ch7_03_eval.csv', epoch_test, net_type + '_epoch_test')
        write2csv('ch7_03_eval.csv', test_acc, net_type + '_test_acc')
        print('record the data')
    else:
        print('net_type is None, don\'t record the data')

In [2]:
hyper_0 = [0.1, 30, 128]
hyper_1 = [0.08, 30, 128]
hyper_2 = [0.2, 30, 128]
hyper_3 = [0.1, 30, 64]
hyper_4 = [0.2, 30, 64]
hyper_5 = [0.1, 30, 512]
hyper_params = [hyper_0, hyper_1, hyper_2, hyper_3, hyper_4, hyper_5]



In [3]:
for idx, hyper in enumerate(hyper_params):
    Nin_net = models.NiN()
    lr, epochs, batch_size = hyper
    train_loader, test_loader = load_data_fashion_mnist(batch_size=batch_size, resize=96)
    print('lr: {}, epochs: {}, batch_size: {}'.format(lr, epochs, batch_size))
    train_models(Nin_net, train_loader, test_loader, epochs=epochs, lr=lr, net_type='Nin_hp' + str(idx))

lr: 0.1, epochs: 30, batch_size: 128
training on cuda:0
epoch: 17, train loss: 0.4615, train acc: 0.8193, test acc: 0.8107
epoch: 18, train loss: 0.4544, train acc: 0.8215, test acc: 0.8074
epoch: 19, train loss: 0.4432, train acc: 0.8248, test acc: 0.8057
epoch: 20, train loss: 0.4375, train acc: 0.8282, test acc: 0.8115
epoch: 21, train loss: 0.4266, train acc: 0.8319, test acc: 0.8153
epoch: 22, train loss: 0.4248, train acc: 0.8322, test acc: 0.8100
epoch: 23, train loss: 0.4140, train acc: 0.8357, test acc: 0.8145
epoch: 24, train loss: 0.4096, train acc: 0.8365, test acc: 0.8180
epoch: 25, train loss: 0.4022, train acc: 0.8395, test acc: 0.7872
epoch: 26, train loss: 0.3946, train acc: 0.8420, test acc: 0.8091
epoch: 27, train loss: 0.3896, train acc: 0.8446, test acc: 0.8167
epoch: 28, train loss: 0.3857, train acc: 0.8444, test acc: 0.8190
epoch: 29, train loss: 0.3771, train acc: 0.8492, test acc: 0.8208
epoch: 30, train loss: 0.3744, train acc: 0.8487, test acc: 0.7982
record

In [8]:
Nin_net = models.NiN()
train_loader, test_loader = load_data_fashion_mnist(batch_size=64, resize=96)
print('lr: {}, epochs: {}, batch_size: {}'.format(0.1, 30, 64))
train_models(Nin_net, train_loader, test_loader, epochs=30, lr=0.1, net_type='Nin_hp' + str(3))

lr: 0.1, epochs: 30, batch_size: 64
training on cuda:0
epoch: 17, train loss: 0.3958, train acc: 0.8399, test acc: 0.8220
epoch: 18, train loss: 0.3895, train acc: 0.8420, test acc: 0.8024
epoch: 19, train loss: 0.3808, train acc: 0.8452, test acc: 0.8140
epoch: 20, train loss: 0.3781, train acc: 0.8472, test acc: 0.8186
epoch: 21, train loss: 0.3704, train acc: 0.8489, test acc: 0.7731
epoch: 22, train loss: 0.3593, train acc: 0.8533, test acc: 0.8185
epoch: 23, train loss: 0.3602, train acc: 0.8523, test acc: 0.8081
epoch: 24, train loss: 0.3460, train acc: 0.8577, test acc: 0.8131
epoch: 25, train loss: 0.3453, train acc: 0.8589, test acc: 0.8056
epoch: 26, train loss: 0.3424, train acc: 0.8587, test acc: 0.8220
epoch: 27, train loss: 0.3396, train acc: 0.8609, test acc: 0.8158
epoch: 28, train loss: 0.3317, train acc: 0.8635, test acc: 0.8222
epoch: 29, train loss: 0.3310, train acc: 0.8638, test acc: 0.8188
epoch: 30, train loss: 0.3258, train acc: 0.8650, test acc: 0.7884
record 

In [2]:
print('training NiN net')
wandb.init(project="CH7",
           name="NiN")
config = wandb.config  # config的初始化

config.batch_size = 64  
config.epochs = 30 
config.lr = 0.1   
config.use_cuda = True
wandb.watch_called = False 
train_loader, test_loader = load_data_fashion_mnist(batch_size=config.batch_size, resize=96)
Nin_net = models.NiN()

train_models(Nin_net, train_loader, test_loader, epochs=config.epochs, lr=config.lr, net_type='NiN', use_wandb=True)


training NiN net


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mwangxinming[0m ([33mgogowang[0m). Use [1m`wandb login --relogin`[0m to force relogin


training on cuda:0
epoch: 17, train loss: 0.6305, train acc: 0.7530, test acc: 0.7368
epoch: 18, train loss: 0.6209, train acc: 0.7552, test acc: 0.7392
epoch: 19, train loss: 0.6162, train acc: 0.7567, test acc: 0.7394
epoch: 20, train loss: 0.6084, train acc: 0.7582, test acc: 0.7205
epoch: 21, train loss: 0.6013, train acc: 0.7609, test acc: 0.7337
epoch: 22, train loss: 0.5966, train acc: 0.7611, test acc: 0.7331
epoch: 23, train loss: 0.5949, train acc: 0.7622, test acc: 0.7364
epoch: 24, train loss: 0.5882, train acc: 0.7647, test acc: 0.7416
epoch: 25, train loss: 0.5801, train acc: 0.7664, test acc: 0.7408
epoch: 26, train loss: 0.5759, train acc: 0.7667, test acc: 0.7231
epoch: 27, train loss: 0.5719, train acc: 0.7683, test acc: 0.7381
epoch: 28, train loss: 0.5695, train acc: 0.7692, test acc: 0.7244
epoch: 29, train loss: 0.5671, train acc: 0.7688, test acc: 0.7425
epoch: 30, train loss: 0.5577, train acc: 0.7729, test acc: 0.7398
record the data


In [3]:
print('training NiNSimple net')

wandb.init(project="CH7",
           name="NiN_simple")
config = wandb.config  # config的初始化
config.batch_size = 64  
config.epochs = 30 
config.lr = 0.1   
config.use_cuda = True
wandb.watch_called = False

train_loader, test_loader = load_data_fashion_mnist(batch_size=config.batch_size, resize=96)
Nin_net = models.NiN_Simple()
train_models(Nin_net, train_loader, test_loader, epochs=config.epochs, lr=config.lr, net_type='NiN_Simple', use_wandb=True)

training NiNSimple net


VBox(children=(Label(value='0.001 MB of 0.102 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.010771…

0,1
test_loss,▁▃▃▄▄▇▇▇▇▇██▇██████▇█████▇█▇██
train_acc,▁▂▅▆▆▆▆▆▇▇▇█████████████████████████████
train_loss,█▇▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
test_loss,0.7398
train_acc,0.77285
train_loss,0.5577


training on cuda:0
epoch: 17, train loss: 0.1570, train acc: 0.9412, test acc: 0.9187
epoch: 18, train loss: 0.1544, train acc: 0.9418, test acc: 0.9111
epoch: 19, train loss: 0.1435, train acc: 0.9468, test acc: 0.9166
epoch: 20, train loss: 0.1362, train acc: 0.9485, test acc: 0.9161
epoch: 21, train loss: 0.1349, train acc: 0.9500, test acc: 0.9153
epoch: 22, train loss: 0.1256, train acc: 0.9528, test acc: 0.9170
epoch: 23, train loss: 0.1210, train acc: 0.9544, test acc: 0.9131
epoch: 24, train loss: 0.1157, train acc: 0.9562, test acc: 0.9196
epoch: 25, train loss: 0.1072, train acc: 0.9601, test acc: 0.9183
epoch: 26, train loss: 0.1091, train acc: 0.9591, test acc: 0.9141
epoch: 27, train loss: 0.1015, train acc: 0.9613, test acc: 0.9120
epoch: 28, train loss: 0.0969, train acc: 0.9635, test acc: 0.9094
epoch: 29, train loss: 0.0923, train acc: 0.9646, test acc: 0.9139
epoch: 30, train loss: 0.0884, train acc: 0.9673, test acc: 0.9037
record the data
