In [2]:
import torch.nn as nn
import torch
import matplotlib.pyplot as plt
import pickle
import numpy as np
import torch.nn.functional as F
import os
import torchvision

In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(device)

cuda


# 1.数据读取

In [4]:
# 直接读取数据，没有数据增强
def get_data(size=64,batch_size=128):
    train_transform = torchvision.transforms.Compose([
        torchvision.transforms.Resize(size=(size, size)),
        torchvision.transforms.ToTensor(),
        ]) # 是否需要Normalization? torchvision.transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    train_dataset = torchvision.datasets.ImageFolder(root='./tiny-imagenet-200/train', transform=train_transform)
    val_dataset = torchvision.datasets.ImageFolder(root='./tiny-imagenet-200/val', transform=train_transform)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    print('Successfully load data!')
    return train_loader,test_loader

In [5]:
# 训练集的标签按照文件名的顺序开始 验证集的标签自行确定
train_loader,val_loader = get_data(batch_size=128) # 一般使用batch为128

Successfully load data!


In [60]:
# 验证集的标签需要额外获取
# 测试集顺序获取标签 从而获得验证集的标签
TrainList = os.listdir('./tiny-imagenet-200/train/')
ValList = os.listdir('./tiny-imagenet-200/val/images')
# 生成两个字典 获取验证集的标签
NameToLabel = {}
ValnameToLabel = {}
for i,trainPath in enumerate(TrainList):
    NameToLabel[trainPath] = i
val_label = torch.zeros(10000)
file_class = {}
with open('./tiny-imagenet-200/val/val_annotations.txt','r',encoding='utf-8')as f:
    labels = f.readlines()
    for label in labels:
        ValnameToLabel[label.split('\t')[0]] = NameToLabel[label.split('\t')[1]]
# 字典完成后获取标签
for i,path in enumerate(ValList):
    val_label[i] = ValnameToLabel[path]
print(val_label[:5])
val_label = val_label.to(device)

tensor([107., 139., 158.,  90., 138.])


#### 对于任务1中某些模型需要数据增强的方法提供支持

In [28]:
trainList = [
        torchvision.transforms.RandomHorizontalFlip(p=0.6),
        torchvision.transforms.RandomVerticalFlip(p=0.6),
        torchvision.transforms.RandomRotation(degrees=20),
        torchvision.transforms.ColorJitter(brightness=0, contrast=[0.9,1.08], saturation=0, hue=0)
    ]
valCope= torchvision.transforms.CenterCrop(56)
trainCope = torchvision.transforms.Compose([
    torchvision.transforms.RandomCrop(size=56),
    torchvision.transforms.RandomChoice(trainList),
    ])
def getdata_task1_aug(X,mode='train'):
    if mode == 'train':
        X = trainCope(X)
    elif mode == 'val':
        X = valCope(X)
    else:
        print('error!')
    return X

#### 对于任务1中DenseNet需要数据增强的方法提供支持

In [29]:
dataList = [
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    torchvision.transforms.RandomVerticalFlip(p=0.2),
    torchvision.transforms.GaussianBlur(kernel_size=3,sigma=(0.1,2.0)),
    torchvision.transforms.RandomCrop(64, padding=10, pad_if_needed=False, fill=0, padding_mode='constant'),
    torchvision.transforms.RandomAffine(degrees=45, translate=(0.2,0.2), shear=16)
]
dataCope = torchvision.transforms.RandomApply(dataList, p=0.5)
def get_data_for_densenet(X):
    X = dataCope(X)
    return X

低配版Mask（本质上也是一种数据增强的方法）

In [4]:
import numpy as np
from typing import Any, Callable, Optional, Tuple
from torchvision.datasets.folder import DatasetFolder, default_loader, IMG_EXTENSIONS
import albumentations as A

class Transforms:
    def __init__(self, transforms: A.Compose):
        self.transforms = transforms

    def __call__(self, img, *args, **kwargs):
        return self.transforms(image=np.array(img))['image']

class CustomImageFolder(DatasetFolder):
    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        loader: Callable[[str], Any] = default_loader,
        is_valid_file: Optional[Callable[[str], bool]] = None,
    ):
        super().__init__(
            root,
            loader,
            IMG_EXTENSIONS if is_valid_file is None else None,
            transform=transform,
            target_transform=target_transform,
            is_valid_file=is_valid_file,
        )
        self.imgs = self.samples

    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        """
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        """
        path, target = self.samples[index]
        sample = self.loader(path)
        if self.transform is not None:
            try:
                sample = self.transform(sample)
            except Exception:
                sample = self.transform(image=np.array(sample))["image"]
        if self.target_transform is not None:
            target = self.target_transform(target)

        return sample, target

    def __len__(self) -> int:
        return len(self.samples)

In [None]:
# 对于加了噪声和没加噪声的数据，要保证打乱后的顺序一致
trainset_masked = CustomImageFolder(root='./tiny-imagenet-200/train',transform=Transforms(transforms=A.Cutout(num_holes=12, max_h_size=16, max_w_size=16, fill_value=0, always_apply=False, p=1)))
trainset_nomask = CustomImageFolder(root='./tiny-imagenet-200/train',transform=Transforms(transforms=A.Resize(64,64)))
torch.manual_seed(0)
g = torch.Generator()
train_loader_masked = torch.utils.data.DataLoader(trainset_masked, batch_size=128, shuffle=True,generator=g)
torch.manual_seed(0)
g = torch.Generator()
train_loader_nomask = torch.utils.data.DataLoader(trainset_nomask, batch_size=128, shuffle=True,generator=g)

# 2.任务1

#### 首先尝试AlexNet，可以作为baseline

In [9]:
def train_task1(model,epoch=100,lr=0.001):
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=1e-4) # 学习率有待调整
    schedule = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=32)
    # optimizer = torch.optim.SGD(model.parameters(),momentum=0.9,lr=lr,weight_decay=1e-4)
    lossRec = []
    validRec = []

    for i in range(epoch):
        running_loss = 0.0
        for data in train_loader:
            imgs,target = data
            imgs = imgs.to(device)
            target = target.to(device)
            output = model(imgs)    
            loss = loss_function(output,target)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        lossRec.append(running_loss/len(train_loader))
        print('Epoch: [{}/{}],TrainLoss:{:.5f}'.format(i+1,epoch,running_loss/len(train_loader))) 

        with torch.no_grad():
            correct_num = 0
            all_num = 0
            Pred = torch.zeros(0)
            Pred = Pred.to(device)
            for data in val_loader:
                imgs,_ = data
                pred = model(imgs.to(device))
                prob = torch.softmax(pred,dim=1)
                y_pred = torch.argmax(prob,dim=1)
                Pred = torch.concat((Pred,y_pred),0)
            acc = torch.sum(Pred == val_label)/Pred.shape[0]
            print('TestAcc:{:.4f}'.format(acc))
            validRec.append(acc.cpu().item())

        schedule.step()
        torch.save(model,'./AlexNet/AlexNet_{}.pt'.format(i+1+7))

    t = range(1,len(lossRec)+1)
    plt.figure(figsize=(18,10))
    plt.subplot(121)
    plt.plot(t,lossRec)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.subplot(122)
    plt.plot(t,validRec)
    plt.xlabel('epochs')
    plt.ylabel('Acc')
    plt.suptitle("loss-epoch-acc")
    plt.savefig('./AlexNet/img')
    plt.show()

    with open('trainloss.txt','a',encoding='utf-8')as f:
        f.writelines(lossRec)
    with open('valloss.txt','a',encoding='utf-8')as f:
        f.writelines(validRec)
        
    return

In [38]:
# AlexNet的代码在Alexnet.py文件中，其他模型同理
from aleXnet import Alexnet

model = Alexnet(num_classes=200,init_weights=True)
# print(model)
model.to(device)
# train_task1(model,lr=0.001,epoch=20)

Alexnet(
  (features): Sequential(
    (0): Conv2d(3, 48, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(48, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace=True)
    (11): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace=True)
    (14): Conv2d

#### 尝试使用ResNet网络

In [43]:
def train_resnet(model,epoch=100,lr=0.1):
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(),momentum=0.9,lr=lr,weight_decay=2e-4)
    
    lossRec = []
    validRec = []

    for i in range(epoch):
        running_loss = 0.0
        for data in train_loader:
            imgs,target = data
            imgs = getdata_task1_aug(imgs.to(device),mode='train') # 采用新的数据增强方式
            target = target.to(device)
            output = model(imgs)    
            loss = loss_function(output,target)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        lossRec.append(running_loss/len(train_loader))
        print('Epoch: [{}/{}],TrainLoss:{:.5f}'.format(i+1,epoch,running_loss/len(train_loader))) 

        with torch.no_grad():
            correct_num = 0
            all_num = 0
            Pred = torch.zeros(0)
            Pred = Pred.to(device)
            for data in val_loader:
                imgs,_ = data
                imgs = getdata_task1_aug(imgs.to(device),mode='val')
                pred = model(imgs)
                prob = torch.softmax(pred,dim=1)
                y_pred = torch.argmax(prob,dim=1)
                Pred = torch.concat((Pred,y_pred),0)
            acc = torch.sum(Pred == val_label)/Pred.shape[0]
            print('TestAcc:{:.4f}'.format(acc))
            validRec.append(acc.cpu().item())
        torch.save(model,'./ResNet_New/ResNet_New_{}.pt'.format(i+1+65))

    with open('trainloss.txt','a',encoding='utf-8')as f:
        for loss in lossRec:
            f.write(str(loss)+'\n')
    with open('valloss.txt','a',encoding='utf-8')as f:
        for loss in validRec:
            f.write(str(loss)+'\n')

    t = range(1,len(lossRec)+1)
    plt.figure(figsize=(18,10))
    plt.subplot(121)
    plt.plot(t,lossRec)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.subplot(122)
    plt.plot(t,validRec)
    plt.xlabel('epochs')
    plt.ylabel('Acc')
    plt.suptitle("loss-epoch-acc")
    plt.savefig('./ResNet_New/img')
    plt.show()

    return

In [None]:
from ResNet import ResNet

model = torch.load('./ResNet_New/ResNet_New_65.pt') # ResNet(init_weights=True)
model.to(device)
train_resnet(model,epoch=10,lr=0.00005)


#### 使用Resnet

最后的正确率大约在55%附近，使用的trick和训练方法基本可以从代码中总结出来

在模型设计中取n=1

#### 尝试使用Inception-ResNet

In [10]:
def train_inception_resnet(model,epoch=100,lr=0.01):
    loss_function = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),alpha=0.9,eps=1.0,lr=lr,weight_decay=2e-4)
    # optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=2e-4)
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9) # 每轮学习率乘以0.9
    
    lossRec = []
    validRec = []

    for i in range(epoch):
        running_loss = 0.0
        for data in train_loader:
            imgs,target = data
            imgs = getdata_task1_aug(imgs.to(device),mode='train') # 采用新的数据增强方式
            target = target.to(device)
            output = model(imgs)    
            loss = loss_function(output,target)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        lossRec.append(running_loss/len(train_loader))
        print('Epoch: [{}/{}],TrainLoss:{:.5f}'.format(i+1,epoch,running_loss/len(train_loader))) 

        with torch.no_grad():
            correct_num = 0
            all_num = 0
            Pred = torch.zeros(0)
            Pred = Pred.to(device)
            for data in val_loader:
                imgs,_ = data
                imgs = getdata_task1_aug(imgs.to(device),mode='val')
                pred = model(imgs)
                prob = torch.softmax(pred,dim=1)
                y_pred = torch.argmax(prob,dim=1)
                Pred = torch.concat((Pred,y_pred),0)
            acc = torch.sum(Pred == val_label)/Pred.shape[0]
            print('TestAcc:{:.4f}'.format(acc))
            validRec.append(acc.cpu().item())
        torch.save(model,'./Inception_Tiny_ResNet/Inception_Tiny_ResNet_{}.pt'.format(i+1))
        # scheduler.step()

    with open('trainloss.txt','a',encoding='utf-8')as f:
        for loss in lossRec:
            f.write(str(loss)+'\n')
    with open('valloss.txt','a',encoding='utf-8')as f:
        for loss in validRec:
            f.write(str(loss)+'\n')

    t = range(1,len(lossRec)+1)
    # plt.figure(figsize=(18,10))
    plt.subplot(121)
    plt.plot(t,lossRec)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.subplot(122)
    plt.plot(t,validRec)
    plt.xlabel('epochs')
    plt.ylabel('Acc')
    plt.suptitle("loss-epoch-acc")
    plt.savefig('./Inception_Tiny_ResNet/img')
    plt.show()
    return

In [3]:
from Inception_ResNet import Inception_ResNet

model = Inception_ResNet(init_weights=True)
# print(model)
model = model.to(device)
# train_inception_resnet(model,lr=0.1,epoch=20)

#### 使用n=1的Inception ResNet模型

首先取n=1，可以发现模型收敛非常慢，首先考虑学习率的初始值较小，可以试试调大，其次也可能是模型的结构比较简单，需要将n设置为更高的数值由此来提高模型的拟合能力

在训练了60轮后，可以发现后期的正确率迭代非常慢，基本在30+附近，这也说明模型的拟合能力是不够的...

In [10]:
from Inception_ResNet import Inception_ResNet2

model = Inception_ResNet2(init_weights=True)
model = model.to(device)
# train_inception_resnet(model,lr=0.1,epoch=60)

发现n=2以后得到的模型效果更差，可能是参数量过多造成了过拟合问题，因此还是采用n=1

n=1在调整了学习率以后依然出现卡在40+的情况，首先可能是训练轮数不够（这个看完60轮的训练图像再做判断）-> 结果确实是在44附近就收敛了

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.9) # 每轮正确率乘以0.9

其次可能是训练的方法不对，原论文中的方法收敛太慢，与原论文给出的结果不一致

可以考虑用类似于ResNet训练的方法，此外也考虑到与ResNet相比参数可能有些过多了，可以调整一下参数量，将模型简化一些

尝试使用了SGD和Adam作为优化器，SGD训练误差为nan，Adam的误差也下降极慢，可见训练方法上还是RMProp比较合适。考虑可能是模型结构出现了问题。

接下来尝试两个方向：*手动调节学习率*、*简单化网络结构再重新训练*

手动调节学习率的方法有一定的提升，达到了48左右，只能尝试把模型改得更简单，还可以再尝试不使用数据增强

将模型结构变得更加简单以后发现效果也并不理想，大约再46左右。发现应该是在每一个残差块上都乘以0.1，而不是只有最后一块。于是重新构建网络，并且进行训练，模型明命名为Inception-ResNet-New.

但是这样train的模型依然存在过拟合的问题，与ResNet网络进行对比可以看出来。-> 可以试试加momentem?

尝试修改网络，将网络改得更加简单。

In [None]:
from Inception_ResNet import Inception_ResNet3

model = torch.load('./Inception_ResNet/Inception_ResNet_30.pt') #Inception_ResNet3(init_weights=True)
model = model.to(device)
train_inception_resnet(model,lr=0.01,epoch=10)

测算模型的参数量

In [19]:
def get_parameter_number(model):
    total_num = sum(p.numel() for p in model.parameters())
    trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('Total:',total_num)
    print('trainable:',trainable_num)
    return


In [20]:
model = torch.load('./renwu2_3.pt')
get_parameter_number(model)

Total: 4310059
trainable: 4310059


### 接下来尝试使用DenseNet

In [13]:
def train_densenet(model,epoch=100,lr=1e-4,max_lr=6e-4,step_size=2000):
    loss_function = nn.CrossEntropyLoss()
    # optimizer = torch.optim.SGD(model.parameters(),momentum=0.9,lr=lr,weight_decay=2e-4)
    optimizer = torch.optim.Adam(model.parameters(),lr=lr,eps=1e-8)
    schedule = torch.optim.lr_scheduler.CyclicLR(optimizer=optimizer,base_lr=lr,max_lr=max_lr,step_size_up=step_size,mode='triangular2',cycle_momentum=False)

    lossRec = []
    validRec = []

    for i in range(epoch):
        running_loss = 0.0
        for data in train_loader:
            imgs,target = data
            # imgs = getdata_task1_aug(imgs.to(device),mode='train') # 采用新的数据增强方式
            imgs = get_data_for_densenet(imgs.to(device))
            target = target.to(device)
            output = model(imgs)    
            loss = loss_function(output,target)
            running_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            schedule.step()
        lossRec.append(running_loss/len(train_loader))
        print('Epoch: [{}/{}],TrainLoss:{:.5f}'.format(i+1,epoch,running_loss/len(train_loader))) 

        with torch.no_grad():
            correct_num = 0
            all_num = 0
            Pred = torch.zeros(0)
            Pred = Pred.to(device)
            for data in val_loader:
                imgs,_ = data
                imgs = imgs.to(device) #getdata_task1_aug(imgs.to(device),mode='val')
                pred = model(imgs)
                prob = torch.softmax(pred,dim=1)
                y_pred = torch.argmax(prob,dim=1)
                Pred = torch.concat((Pred,y_pred),0)
            acc = torch.sum(Pred == val_label)/Pred.shape[0]
            print('TestAcc:{:.4f}'.format(acc))
            validRec.append(acc.cpu().item())
        torch.save(model,'./DEnseNet/DenseNet_{}.pt'.format(i+1))

    with open('trainloss.txt','a',encoding='utf-8')as f:
        for loss in lossRec:
            f.write(str(loss)+'\n')
    with open('valloss.txt','a',encoding='utf-8')as f:
        for loss in validRec:
            f.write(str(loss)+'\n')

    t = range(1,len(lossRec)+1)
    plt.figure(figsize=(18,10))
    plt.subplot(121)
    plt.plot(t,lossRec)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.subplot(122)
    plt.plot(t,validRec)
    plt.xlabel('epochs')
    plt.ylabel('Acc')
    plt.suptitle("loss-epoch-acc")
    plt.savefig('./DEnseNet/img')
    plt.show()

    return

In [None]:
from DenseNet import DenseNet

model = DenseNet(init_weights=True)
model = model.to(device)
train_densenet(model,lr=1e-4,max_lr=6e-4,step_size=4687,epoch=24)

DenseNet的效果比较好，可以达到60%左右，且前期的收敛速度较快。将这一结果作为任务的最终结果。

In [32]:
# 用来进行误差的分析
Fen = {}
LabelToName = {value:key for key,value in NameToLabel.items()}
# print(LabelToName)
# print(Score)
LLables = []
with open('./tiny-imagenet-200/val/val_annotations.txt','r',encoding='utf-8')as f:
    labels = f.readlines()
    for label in labels:
        # ValnameToLabel[label.split('\t')[0]] = NameToLabel[label.split('\t')[1]]    
        LLables.append(label.split('\t')[1])
print(len(LLables))

10000


In [38]:
model = torch.load('./renwu1.pt') # /AutoEncoder/AutoEncoder_70.pt
model = model.to(device)

with torch.no_grad():
    correct_num = 0
    all_num = 0
    Pred = torch.zeros(0)
    Pred = Pred.to(device)
    for data in val_loader:
        imgs,_ = data
        imgs = imgs.to(device) #getdata_task1_aug(imgs.to(device),mode='val')
        pred = model(imgs) # mode='pred'
        prob = torch.softmax(pred,dim=1)
        y_pred = torch.argmax(prob,dim=1)
        Pred = torch.concat((Pred,y_pred),0)
    print(Pred == val_label)
# print(acc)

tensor([ True, False, False,  ..., False,  True,  True], device='cuda:0')


In [56]:
Score = {key:0 for key in LabelToName.values()}
Index = (Pred==val_label).long()
for i in range(Index.shape[0]):
    if Index[i] == 1:
        Score[LLables[i]] += 1
print(sum(Score.values()))
Score = sorted(Score.items(),key=lambda x:x[1])
print(Score[0:5])
print(Score[-5:])

6277
[('n06596364', 21), ('n02132136', 23), ('n02403003', 23), ('n02917067', 23), ('n04371430', 24)]
[('n04540053', 38), ('n02123394', 39), ('n04074963', 39), ('n02415577', 40), ('n03733131', 40)]
