## 0. 导入依赖包

In [1]:
import os
import cv2
import torch
import random
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import transforms
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


## 1. 数据处理

In [14]:
seed=0
num_workers=64
batch_size=2048
data_path = '/gemini/data-1/tiny-imagenet-200/'
out_path = '/gemini/code/dl-hw2/'

In [18]:
# 定义数据类处理文件
class RawData:

    __labels_t_path = '%s%s' % (data_path, 'wnids.txt')
    __train_data_path = '%s%s' % (data_path, 'train/')
    __val_data_path = '%s%s' % (data_path, 'val/')

    __labels_t = None
    __image_names = None

    __val_labels_t = None
    __val_labels = None
    __val_names = None

    @staticmethod
    def labels_t():
        if RawData.__labels_t is None:
            labels_t = []
            with open(RawData.__labels_t_path) as wnid:
                for line in wnid:
                    labels_t.append(line.strip('\n'))

            RawData.__labels_t = labels_t

        return RawData.__labels_t

    @staticmethod
    def image_names():
        if RawData.__image_names is None:
            image_names = []
            labels_t = RawData.labels_t()
            for label in labels_t:
                txt_path = RawData.__train_data_path + label + '/' + label + '_boxes.txt'
                image_name = []
                with open(txt_path) as txt:
                    for line in txt:
                        image_name.append(line.strip('\n').split('\t')[0])
                image_names.append(image_name)

            RawData.__image_names = image_names

        return RawData.__image_names

    @staticmethod
    def val_labels_t():
        if RawData.__val_labels_t is None:
            val_labels_t = []
            with open(RawData.__val_data_path + 'val_annotations.txt') as txt:
                for line in txt:
                    val_labels_t.append(line.strip('\n').split('\t')[1])

            RawData.__val_labels_t = val_labels_t

        return RawData.__val_labels_t

    @staticmethod
    def val_names():
        if RawData.__val_names is None:
            val_names = []
            with open(RawData.__val_data_path + 'val_annotations.txt') as txt:
                for line in txt:
                    val_names.append(line.strip('\n').split('\t')[0])

            RawData.__val_names = val_names

        return RawData.__val_names

    @staticmethod
    def val_labels():
        if RawData.__val_labels is None:
            val_labels = []
            val_labels_t = RawData.val_labels_t()
            labels_t = RawData.labels_t()
            for i in range(len(val_labels_t)):
                for i_t in range(len(labels_t)):
                    if val_labels_t[i] == labels_t[i_t]:
                        val_labels.append(i_t)
            val_labels = np.array(val_labels)

            RawData.__val_labels = val_labels

        return RawData.__val_labels


# 定义 Dataset 类
class Data(Dataset):

    def __init__(self, type_, transform):
        """
        type_: 选择训练集还是验证集
        """
        self.__train_data_path = '%s%s' % (data_path, 'train/')
        self.__val_data_path = '%s%s' % (data_path, 'val/')

        self.type = type_

        self.labels_t = RawData.labels_t()
        self.image_names = RawData.image_names()
        self.val_names = RawData.val_names()

        self.transform = transform

    def __getitem__(self, index):
        label = None
        image = None

        labels_t = self.labels_t
        image_names = self.image_names
        val_labels = RawData.val_labels()
        val_names = self.val_names

        if self.type == "train":
            label = index // 500  # 每个类别的图片 500 张
            remain = index % 500
            image_path = os.path.join(self.__train_data_path, labels_t[label], 'images', image_names[label][remain])
            image = cv2.imread(image_path)
            image = np.array(image).reshape(64, 64, 3)

        elif self.type == "val":
            label = val_labels[index]
            val_image_path = os.path.join(self.__val_data_path, 'images', val_names[index])
            image = np.array(cv2.imread(val_image_path)).reshape(64, 64, 3)

        return label, self.transform(image)

    def __len__(self):
        len_ = 0
        if self.type == "train":
            len_ = len(self.image_names) * len(self.image_names[0])
        elif self.type == "val":
            len_ = len(self.val_names)

        return len_

torch.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed(seed)
random.seed(seed)
# 这里 ToTensor 会把 numpy 类型转换为 tensor 类型，并对数据归一化到 [0, 1]
train_dataset = Data(type_="train", transform=transforms.Compose([transforms.ToTensor()]))
test_dataset = Data(type_="val", transform=transforms.Compose([transforms.ToTensor()]))
# 从训练数据中手动划分训练集和验证集
train_dataset, val_dataset = random_split(train_dataset,
                                                    [int(len(train_dataset) * 0.8),
                                                     len(train_dataset) - int(len(train_dataset) * 0.8)],
                                                    generator=torch.Generator().manual_seed(0))




trainloader = DataLoader(dataset=train_dataset,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=num_workers,
                                   drop_last=True)
valloader = DataLoader(dataset=val_dataset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers,
                                 drop_last=True)
testloader = DataLoader(dataset=test_dataset,
                                batch_size=batch_size,
                                shuffle=False,
                                num_workers=num_workers,
                                drop_last=False)

## 2. 模型搭建


In [4]:
class conv_norm_relu_drop(nn.Module):
    """卷积层
    """
    def __init__(self, in_channel, out_channel, dropout=False, normalize=False):
        super(conv_norm_relu_drop, self).__init__()
        self.conv = nn.Conv2d(in_channel, out_channel, 3, 1, 1)
        self.normalize = normalize
        if normalize:
            self.norm = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU()
        self.dropout = dropout
        if dropout:
            self.drop = nn.Dropout(p=0.2)

    def forward(self, x):
        x = self.conv(x)
        if self.normalize:
            x = self.norm(x)
        x = self.relu(x)
        if self.dropout:
            x = self.drop(x)
        return x

class residual_block(nn.Module):
    """残差网络
    """

    def __init__(self, channel, dropout=False, normalize=False):
        super(residual_block, self).__init__()
        self.conv1 = conv_norm_relu_drop(channel,channel,dropout,normalize)
        self.conv2 = conv_norm_relu_drop(channel,channel,dropout,normalize)
        
    def forward(self, x):
        x_ = self.conv1(x)
        x_ = self.conv2(x_)
        return x + x_
        

class Net(nn.Module):
    """自定义卷积网络
    """

    def __init__(self, channels = [64, 128, 256, 512, 256, 128], dropout=True, normalize=True, residual=True):
        super(Net, self).__init__()

        # 构建网络结构
        self.layers = nn.Sequential()
        channels = [3] + channels  
        if residual:
            for i in range(len(channels) - 1):
                setattr(self.layers, f'conv{i + 1}', conv_norm_relu_drop(channels[i], channels[i + 1], dropout, normalize))
                setattr(self.layers, f'resi{i + 1}', residual_block(channels[i + 1], dropout, normalize))
                setattr(self.layers, f'pool{i + 1}', nn.MaxPool2d(2, 2))
        else:
            for i in range(len(channels) - 1):
                setattr(self.layers, f'conv{i + 1}_1', conv_norm_relu_drop(channels[i], channels[i + 1], dropout, normalize))
                setattr(self.layers, f'conv{i + 1}_2', conv_norm_relu_drop(channels[i + 1], channels[i + 1], dropout, normalize))
                setattr(self.layers, f'conv{i + 1}_3', conv_norm_relu_drop(channels[i + 1], channels[i + 1], dropout, normalize))
                setattr(self.layers, f'pool{i + 1}', nn.MaxPool2d(2, 2))

        # 添加全连接层
        self.fc1 = nn.Linear(channels[-1] * 2**(12-2*len(channels)+2), 200)


    def forward(self, x):
        x = self.layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x

## 3. 模型训练

In [11]:
def acc(labels, outputs, type_="top1"):
    acc = 0
    if type_ == "top1":
        pre_labels = np.argmax(outputs, axis=1)
        labels = labels.reshape(len(labels))
        acc = np.sum(pre_labels == labels) / len(pre_labels)

    return acc

def Training(device, net, trainloader, valloader, lr, lrd, optimizer,loss_func,scheduler,wait,epochs,test):

    loss_train = []
    loss_val = []
    acc_train = []
    acc_val = []
    for i in range(epochs):
        train_loss = 0.0
        train_acc = 0.0
        val_loss = 0.0
        val_acc = 0.0
        min_val_loss = np.inf
        min_val_loss_acc = 0.0
        delay = 0
        
        net.train()
        for labels,inputs in trainloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = loss_func(outputs, labels)
            train_loss += loss.item()
            train_acc += acc(labels=labels.cpu().numpy(), outputs=outputs.detach().cpu().numpy())
            loss.backward()
            optimizer.step()
        train_loss = train_loss / len(trainloader)
        train_acc = train_acc / len(trainloader)
        loss_train.append(train_loss)
        acc_train.append(train_acc)


        net.eval()
        for labels,inputs in valloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = net(inputs)
            loss = loss_func(outputs, labels)
            val_loss += loss.item()
            val_acc += acc(labels=labels.cpu().numpy(), outputs=outputs.detach().cpu().numpy())
        val_loss = val_loss / len(valloader)
        val_acc = val_acc / len(valloader)
        loss_val.append(train_loss)
        acc_val.append(train_acc)


        print(f"Epoch {i}: train loss {train_loss:10.6f}, acc {train_acc:7.4f}, "f"val loss {val_loss:10.6f}, acc {val_acc:7.4f}, ")

        if lrd:
            scheduler.step(val_loss)
            
        if val_loss < min_val_loss:
            min_val_loss = val_loss
            min_val_loss_acc = val_acc
            print(f"Update min_val_loss to {min_val_loss:10.6f}")
            delay = 0
        else:
            delay = delay + 1

        if delay > wait:
            break
    if test:
        test_acc = 0.0
        for labels, inputs in testloader:
            inputs = inputs.to(device)
            outputs = net(inputs)
            test_acc += acc(labels.numpy(), outputs.detach().cpu().numpy())
        test_acc = test_acc / len(testloader)
        print(test_acc)
    
    data = [train_loss,val_loss,train_acc,val_acc]
    curve = [loss_train, loss_val, acc_train, acc_val]
    return data, curve

## 4. 实验设置

In [15]:
def Test(lrd=True,dropout=True,normalize=True,residual=True,channels= [64, 128, 256, 512, 256, 128],test=False):

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    net = Net(channels, dropout, normalize, residual).to(device)
    lr = 0.001
    optimizer = optim.Adam(net.parameters(), lr=lr)
    loss_func = nn.CrossEntropyLoss()
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=1, verbose=True)
    wait = 4
    epochs = 20

    data, curves = Training(device, net, trainloader, valloader, lr, lrd, optimizer,loss_func,scheduler,wait,epochs,test)
    return data,curves

## 5. 调参分析

In [17]:
def plotter(title,p):
    fig, axs = plt.subplots(2, 2, figsize=(16, 9), dpi=300)
    x = range(len(p[0][1][0]))
    axs[0,0].set_title('loss_train')
    axs[0,1].set_title('loss_val')
    axs[1,0].set_title('acc_train')
    axs[1,1].set_title('acc_val')
    legend = []
    for i in range(len(title)):
        legend.extend([title[i]])
        axs[0,0].plot(x, p[i][1][0])
        axs[0,1].plot(x, p[i][1][1])
        axs[1,0].plot(x, p[i][1][2])
        axs[1,1].plot(x, p[i][1][3])
    axs[0,0].legend(legend)
    axs[0,1].legend(legend)
    axs[1,0].legend(legend)
    axs[1,1].legend(legend)
    plt.savefig(out_path+f"out.png")
    plt.show()
    with open(out_path+f"out.txt", "w") as file:
        for i in range(len(title)):
            file.write(title[i]+":"+'\t'.join(map(str, p[i][0]))+"\n")
    

p11 = Test(lrd=True,dropout=True,normalize=True,residual=True,channels= [64, 128, 256, 512, 256, 128])
p12 = Test(lrd=False,dropout=True,normalize=True,residual=True,channels= [64, 128, 256, 512, 256, 128])
p13 = Test(lrd=True,dropout=False,normalize=True,residual=True,channels= [64, 128, 256, 512, 256, 128])
p14 = Test(lrd=True,dropout=True,normalize=False,residual=True,channels= [64, 128, 256, 512, 256, 128])
p15 = Test(lrd=True,dropout=True,normalize=True,residual=False,channels= [64, 128, 256, 512, 256, 128])
p16 = Test(lrd=True,dropout=True,normalize=True,residual=True,channels= [64, 128, 256, 512, 256])
p17 = Test(lrd=True,dropout=True,normalize=True,residual=True,channels= [64, 128, 256, 512])
p18 = Test(lrd=True,dropout=False,normalize=False,residual=True,channels= [64, 128, 256, 512])
p19 = Test(lrd=True,dropout=False,normalize=True,residual=True,channels= [64, 128, 256, 512])
plotter(['std','no_lrd','no_drop','no_norm','no_res','depth-1','dept-2','no_drop_norm_depth-1','no_drop_depth-1'],[p11,p12,p13,p14,p15,p16,p17,p18,p19])

Epoch 0: train loss   5.156785, acc  0.0209, val loss   4.767892, acc  0.0409, 
Update min_val_loss to   4.767892
Epoch 1: train loss   4.242564, acc  0.1013, val loss   4.178027, acc  0.1161, 
Update min_val_loss to   4.178027
Epoch 2: train loss   3.497676, acc  0.2103, val loss   3.553761, acc  0.2113, 
Update min_val_loss to   3.553761
Epoch 3: train loss   3.042517, acc  0.2925, val loss   3.174985, acc  0.2724, 
Update min_val_loss to   3.174985
Epoch 4: train loss   2.657623, acc  0.3651, val loss   3.179908, acc  0.2820, 
Update min_val_loss to   3.179908
Epoch 5: train loss   2.343330, acc  0.4278, val loss   3.001223, acc  0.3164, 
Update min_val_loss to   3.001223
Epoch 6: train loss   2.039183, acc  0.4923, val loss   3.049166, acc  0.3226, 
Update min_val_loss to   3.049166
Epoch 7: train loss   1.735836, acc  0.5565, val loss   3.011952, acc  0.3481, 
Epoch 00008: reducing learning rate of group 0 to 1.0000e-04.
Update min_val_loss to   3.011952
Epoch 8: train loss   1.04

TypeError: cannot unpack non-iterable numpy.float64 object