### 使用torch.utils.data.Dataset
- 通过继承 torch.utils.data.Dataset 实现用户自定义读取数据集，需要实现__init__ __getitem__和__len__方法。
    - 在__init__中，需要初始化文件路径或文件名列表，以方便后面在__getitem__中读取。在这里，返回了所有图片样本的路径self.all_image_paths以及对应的标签self.all_image_labels，并对mean和std值进行了reshape。
    - 在__getitem__中，需要根据索引读取数据，并对数据进行预处理，返回数据对，例如（图片，标签）对。在这里，将一张图片调整为224×224尺寸并进行归一化，根据torch的输入图片通道要求(C,H,W)进行了转置，返回了(img, label)对。
    - 在__len__中，需要返回整个数据集的数量。

In [1]:
# 数据处理
import os
import torch

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import pickle

# Augmentation
import albumentations
from albumentations.pytorch.transforms import ToTensorV2
import cv2


import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from d2l import torch as d2l
# 我们要做的第一件事就是拆分训练集与数据集
# 标签含有字符串类型，需要通过preprocessing进行转换

lr, num_epochs, batch_size = 1e-3, 50, 64
seed = 666

os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
root = './data/train.csv'
def label_split(root):
    root = pd.read_csv(root)
    image = root['image'].tolist()
    label = root['label'].to_list()
    le = preprocessing.LabelEncoder()
    targets = le.fit_transform(label)
    with open('./labels.pkl','wb') as file:
        pickle.dump(dict(zip(targets,label)),file)
    image_train,image_test, label_train, label_test =train_test_split(image,targets,test_size=0.1, random_state=seed)
    return image_train,image_test, label_train, label_test

In [2]:
transform_train = albumentations.Compose([
            albumentations.Resize(320, 320), # 变换尺寸
            albumentations.HorizontalFlip(p=0.5), # 水平翻转
            albumentations.VerticalFlip(p=0.5), # 垂直翻转
            albumentations.Rotate(limit=180, p=0.7), # 随机旋转
            albumentations.RandomBrightnessContrast(), # 随机条件亮度和对比度
            # 随机放射变换
            albumentations.ShiftScaleRotate(
                shift_limit=0.25, scale_limit=0.1, rotate_limit=0
            ),
            # 图像标准化
            albumentations.Normalize(
                [0.485, 0.456, 0.406], [0.229, 0.224, 0.225],
                max_pixel_value=255.0, always_apply=True
            ),
            ToTensorV2(p=1.0),
        ]
    )

transform_test = albumentations.Compose([
            albumentations.Resize(320, 320),
            albumentations.Normalize(
                [0.485, 0.456, 0.406], [0.229, 0.224, 0.225],
                max_pixel_value=255.0, always_apply=True
            ),
            ToTensorV2(p=1.0)
        ]
    )



class Mydataset(Dataset):
    def __init__(self,image,label, transform):
        """
        定义自己的数据集合
        将图片转换为Tensor,归一化至[0,1]
        transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])  # 标准化至[-1,1]
        imgs 因为所有图片的绝对路径，这里路径都储存在了train.csv中
        所以这里路径应当根据csv来提供
        """
        self.imgs = [os.path.join('./data',x) for x in image]
        self.label=label
        self.transform = transform

    def __getitem__(self, index):
        image_filepath = self.imgs[index]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            image = self.transform(image=image)["image"]
        label = torch.as_tensor(self.label[index])
        return image,label

    def __len__(self):
        return len(self.imgs)

In [3]:
image_train,image_test, label_train, label_test = label_split(root)
train_iter = Mydataset(image_train,label_train,transform= transform_train)
test_iter = Mydataset(image_test,label_test,transform= transform_test)

In [4]:
train_iter = DataLoader(train_iter,batch_size=batch_size,shuffle=False,num_workers=0)
test_iter = DataLoader(test_iter,batch_size=batch_size,shuffle=False,num_workers=0)

In [5]:
# 第一步，定义残差块
class Residual(nn.Module):
    def __init__(self,input_channels,num_channels,use_1x1conv=True, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,kernel_size=3, padding=1)
        # 是否对输出使用1X1卷积层
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)
        self.relu = nn.ReLU(inplace=True) 
    
    def forward(self,X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        # 这里是核心,输出残差
        # 这里需要注意的是，设计Resnet，Y的shape需要和X保持一致
        Y += X
        return F.relu(Y)

# ResNet模型
# ResNet 的前两层跟之前介绍的GoogLeNet 中的⼀样：在输出通道数为64、步幅为2 的7X7 卷积层后，接
# 步幅为2 的3X3 的最⼤汇聚层。不同之处在于ResNet 每个卷积层后增加了批量归⼀化层。
b1 = nn.Sequential(
    nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
    nn.BatchNorm2d(64), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

# 这种写法之前比较少见，通过数组的形式添加层，在直接*args添加进Sequential
def resnet_block(input_channels, num_channels, num_residuals,first_block=False):
    blk = []
    for i in range(num_residuals):
        if i == 0 and not first_block:
            blk.append(Residual(input_channels, num_channels,use_1x1conv=True, strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk

b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))
net = nn.Sequential(
    b1, 
    b2, 
    b3, 
    b4, 
    b5, 
    nn.AdaptiveAvgPool2d((1,1)),
    nn.Flatten(), nn.Linear(512, 256))



In [6]:
X = torch.rand(size=(1, 3, 224, 224))
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 128, 28, 28])
Sequential output shape:	 torch.Size([1, 256, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 512, 1, 1])
Flatten output shape:	 torch.Size([1, 512])
Linear output shape:	 torch.Size([1, 256])


In [7]:
device =d2l.try_gpu()
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        nn.init.xavier_uniform_(m.weight)
net.apply(init_weights)
print('training on', device)
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()

training on cuda:0


In [8]:
for epoch in range(num_epochs):
    net.train()
    for i, (X, y) in enumerate(train_iter):
        optimizer.zero_grad()
        X, y = X.to(device), y.to(device)
        y_hat = net(X)
        l = loss(y_hat, y)
        l.backward()
        optimizer.step()
    s,i,acc = torch.tensor(0.0,device=device),0,torch.tensor(0.0,device=device)
    for a, b in test_iter:
        i +=1
        a, b = a.to(device), b.to(device)
        _, predicted = torch.max(net(a), 1)
        acc += sum(predicted == b)/len(b)*100
    print(f'第{epoch+1}轮精确度为{acc.tolist()/i:.2f}%')

第1轮精确度为7.51%
第2轮精确度为17.79%
第3轮精确度为28.97%
第4轮精确度为39.50%
第5轮精确度为52.65%
第6轮精确度为56.50%
第7轮精确度为63.45%
第8轮精确度为67.53%
第9轮精确度为68.85%
第10轮精确度为73.24%
第11轮精确度为74.74%
第12轮精确度为74.96%
第13轮精确度为79.05%
第14轮精确度为79.15%
第15轮精确度为79.90%
第16轮精确度为80.77%
第17轮精确度为80.58%
第18轮精确度为81.58%
第19轮精确度为70.09%
第20轮精确度为83.62%
第21轮精确度为83.95%
第22轮精确度为83.86%
第23轮精确度为85.08%
第24轮精确度为86.26%
第25轮精确度为85.34%
第26轮精确度为84.77%
第27轮精确度为88.17%
第28轮精确度为86.82%
第29轮精确度为87.17%
第30轮精确度为87.74%
第31轮精确度为87.87%
第32轮精确度为87.63%
第33轮精确度为88.12%
第34轮精确度为89.32%
第35轮精确度为87.93%
第36轮精确度为88.94%
第37轮精确度为89.03%
第38轮精确度为88.38%
第39轮精确度为89.02%
第40轮精确度为89.75%
第41轮精确度为90.16%
第42轮精确度为89.45%
第43轮精确度为89.19%
第44轮精确度为90.49%
第45轮精确度为89.21%
第46轮精确度为90.13%
第47轮精确度为90.77%
第48轮精确度为90.07%
第49轮精确度为90.51%
第50轮精确度为91.34%


In [9]:
torch.save(net.state_dict(), 'resnet.params')