# Pipeline ResNet-D
### В пайплайне используется контруктор ResNetD сетей взятой из [статьи](https://arxiv.org/pdf/1812.01187.pdf)
- Accuracy на тестовом датасете состоавляет 0.92

In [None]:
import torch
import torchvision
from torch import nn
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings('ignore')

In [None]:
# hyper params
batch_size = 64
num_epoch = 10
learning_rate = 0.025 # 0.1 * batch_size / 256
sheduler_type = 'step' # 'cosine' or 'step'
sheduler_cycle = 2
warmup_epoch = 5
optimizer_type = 'SGD' # 'SGD' or 'Adam'
resnet_layers = [3,4,6,3]
bottleneck = False
num_classes = 10
label_smoothing = 0.05
save_best_model = True
save_model_dir = './models/'

In [None]:
if bottleneck == True:
    model_name = f'ResNet{sum(resnet_layers)*3+2}_{optimizer_type}_lr{learning_rate}_b{batch_size}_{sheduler_type}_sc{(num_epoch-warmup_epoch)//sheduler_cycle}'
elif bottleneck == False:
    model_name = f'ResNet{sum(resnet_layers)*2+2}_{optimizer_type}_lr{learning_rate}_b{batch_size}_{sheduler_type}_sc{(num_epoch-warmup_epoch)//sheduler_cycle}'
model_name

In [None]:
labels_df = pd.read_csv('../imagenette/imagenette2-320/noisy_imagenette.csv')
train_img_qty = len(labels_df[labels_df['is_valid'] == False])
val_img_qty = len(labels_df[labels_df['is_valid'] == True])
train_img_qty, val_img_qty

### Создаем DataLoader попутно предобрабатывая данные
- Загрузку датасета можно найти в [ResNet_constructor.ipynb](./ResNet_constructor.ipynb)
- Предварительный просмотр данных можно найти в [ResNet_constructor.ipynb](./ResNet_constructor.ipynb)

In [None]:
from torchvision import transforms, datasets

train_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.RandomSizedCrop(224),
#         transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
#         transforms.RandomResizedCrop(224, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333)),
        transforms.RandomHorizontalFlip(.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
test_transform = transforms.Compose([
        transforms.Resize((224,224)),
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

trainset = datasets.ImageFolder(root='../imagenette/imagenette2-320/train/', transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = datasets.ImageFolder(root='../imagenette/imagenette2-320/val/', transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, #batch_size=batch_size,
                                         shuffle=False)

In [None]:
batch_per_epoch = len(trainloader)
batch_per_epoch

## Создаем конструктор ResNet-like сетей.

### Конструктор представляет собой класс, который ожидает на вход следующие параметры:

 - layers - список с количеством стандартных блоков по слоям
 - num_classes - количество классов
 - bottleneck - определяет использование стандартных блоков или 'bottleneck' блоков 

(!) Конструктор ожидает на вход изображение с разрешением 224х224х3

Примеры стандартных сетей:
ResNet-18: 
model = ResNet_like(layers=[2,2,2,2], num_classes=10, bottleneck=False)

ResNet-36: 
model = ResNet_like(layers=[3,4,6,3], num_classes=10, bottleneck=False)

ResNet-50:
model = ResNet_like(layers=[3,4,6,3], num_classes=10, bottleneck=True)

ResNet-101:
model = ResNet_like(layers=[3,4,23,3], num_classes=10, bottleneck=True)

ResNet-152:
model = ResNet_like(layers=[3,8,36,3], num_classes=10, bottleneck=True)

Выносим в функции сверточные слои для уменьшения количества букв в коде

In [None]:
def conv1x1(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)

def conv3x3(in_channels, out_channels, stride=1,padding=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding)

Задаем базовые блоки через классы.
Класс NormalBlock собирает стандартный ResNet блок с skipconnection'ом
Класс BottleneckBlock собирает Bottleneck ResNet блок с skipconnection'ом

Каждый класс ожидает параметры:
 - num_layer - порядковый номер слоя, в котором будет использоваться данных блок. В стандартной ResNet архитектуре блоки используются со второго слоя.
 - downsample - определяет тип downsampling'а.
     - 0 - downsampling не используется
     - 1 - downsampling используется в блоке, где уменьшается разрешение и увеличивается кол-во каналов
     - -1 - downsampling используется в блоке, где разрешение не уменьшается, но увеличивается кол-во каналов (обычно последний слой)

In [None]:
class NormalBlock(nn.Module):
    def __init__(
        self,
        num_layer,
        downsample = 0,
        
    ):
        super(NormalBlock, self).__init__()
        self.use_downsample = downsample
        if num_layer == 2 and downsample == 1:
            self.in_channels = 16*(2**num_layer)
        elif num_layer > 2 and downsample != 0:
            self.in_channels = 16*(2**(num_layer-1))
        elif downsample == 0: 
            self.in_channels = 16*(2**num_layer)
            
        self.out_channels = 16*(2**num_layer)
        
        if downsample == 1:
            self.downsample = nn.Sequential(
                nn.AvgPool2d(kernel_size=2,stride=2),
                conv1x1(self.in_channels, self.out_channels, stride=1),
                nn.BatchNorm2d(self.out_channels))

            self.conv1 = conv3x3(self.in_channels, self.out_channels,stride=2)
        elif downsample == -1:
            self.downsample = conv1x1(self.in_channels, self.out_channels, stride=1)
            
            self.conv1 = conv3x3(self.in_channels, self.out_channels, stride=1)
            
        elif downsample == 0:
            self.conv1 = conv3x3(self.in_channels, self.out_channels, stride=1)
        self.in_channels = self.out_channels
        self.bn1 = nn.BatchNorm2d(self.out_channels)
        self.relu = nn.ReLU()
        self.conv2 = conv3x3(self.in_channels, self.out_channels)
        self.bn2 = nn.BatchNorm2d(self.out_channels)
        
    def forward(self, x):
        skip = x
#         print('Block input',x.shape)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
#         print('Block before skip',out.shape)

        if self.use_downsample != 0:
#             print('Before downsample',out.shape, skip.shape)
#             if self.use_downsample == 1:
#                 out = self.maxpool(out)
            skip = self.downsample(x)
#             print('After downsample',out.shape, skip.shape)
        out += skip
        out = self.relu(out)

        return out

In [None]:
class BottleneckBlock(nn.Module):
    def __init__(
        self,
        num_layer,
        downsample = 0
        
    ):
        super(BottleneckBlock, self).__init__()
        
        self.use_downsample = downsample
        if num_layer == 2 and downsample == 1:
            self.in_channels = 16*(2**num_layer)
        elif num_layer > 2 and downsample != 0:
            self.in_channels = 16*(2**(num_layer-1))*4
        elif downsample == 0: 
            self.in_channels = 16*(2**num_layer)*4
            
        self.out_channels = 16*(2**num_layer)
   
        if downsample == 1:
            self.downsample = nn.Sequential(
                nn.AvgPool2d(kernel_size=2,stride=2),
                conv1x1(self.in_channels, self.out_channels*4, stride=1),
                nn.BatchNorm2d(self.out_channels*4))

            self.conv1 = conv1x1(self.in_channels, self.out_channels)
            self.in_channels = self.out_channels
            self.bn1 = nn.BatchNorm2d(self.out_channels)
            self.conv2 = conv3x3(self.in_channels, self.out_channels,stride=2)
        elif downsample == -1:
            self.downsample = nn.Sequential(
#                 nn.AvgPool2d(kernel_size=2,stride=2)
                conv1x1(self.in_channels, self.out_channels*4, stride=1),
                nn.BatchNorm2d(self.out_channels*4))
            
            self.conv1 = conv1x1(self.in_channels, self.out_channels)
            self.in_channels = self.out_channels
            self.bn1 = nn.BatchNorm2d(self.out_channels)
            self.conv2 = conv3x3(self.in_channels, self.out_channels)

        elif downsample == 0:
            self.conv1 = conv1x1(self.in_channels, self.out_channels)
            self.in_channels = self.out_channels
            self.bn1 = nn.BatchNorm2d(self.out_channels)
            self.conv2 = conv3x3(self.in_channels, self.out_channels)
            
        self.bn2 = nn.BatchNorm2d(self.out_channels)
        self.out_channels = 16*(2**num_layer)*4
        self.conv3 = conv1x1(self.in_channels, self.out_channels)
        self.bn3 = nn.BatchNorm2d(self.out_channels)
        self.in_channels = self.out_channels
        self.relu = nn.ReLU()
        
    def forward(self, x):
        skip = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)

        if self.use_downsample != 0:
#             print(out.shape, x.shape)
#             if self.use_downsample == 1:
#                 out = self.maxpool(out)
            skip = self.downsample(x)
#             print(out.shape, x.shape)
        out += skip
        out = self.relu(out)
            
        return out

### Класс конструктор ResNet подобных архитектур

In [None]:
class ResNet_like(nn.Module):

    def __init__(self, 
                 layers, 
                 num_classes,
                 bottleneck,
                 
                 ):
        
        super(ResNet_like, self).__init__()
        self.first = nn.Sequential(
            conv3x3(3, 32, stride=2),
            conv3x3(32, 32, stride=2),
            conv3x3(32, 64))
        
        self.body = nn.Sequential()
        if bottleneck == True:
            for num, layer in enumerate(layers):
                for block in range(layer):
                    if block == 0  and num < len(layers) - 1:
                        downsample = 1
                    elif block == 0 and num == len(layers) - 1:
                        downsample = -1
                    elif block != 0:  
                        downsample = 0
                    self.body.add_module(name='block_%d_%d'%(num+2,block+1), module=BottleneckBlock(num+2, downsample))
        elif bottleneck == False:
            for num, layer in enumerate(layers):
                for block in range(layer):
                    if block == 0  and num < len(layers) - 1:
                        downsample = 1
                    elif block == 0 and num == len(layers) - 1:
                        downsample = -1
                    elif block != 0:  
                        downsample = 0
                    self.body.add_module(name='block_%d_%d'%(num+2,block+1), module=NormalBlock(num+2, downsample))
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        if bottleneck == True:
            self.linear_input = 32*(2**(len(layers)))*4
        else:
            self.linear_input = 32*(2**(len(layers)))
        self.linear = nn.Linear(self.linear_input, num_classes)
        
    def forward(self, x):

        x = self.first(x)
#         print('Shape input body:', x.shape)
        x = self.body(x)
#         print('Shape input avgpool:', x.shape)
        x = self.avgpool(x)
#         print('Shape input linear:', x.shape)
        x = x.view(x.size(0), -1)
#         print('Shape input linear:', x.shape)
        x = self.linear(x)
#         x = self.final(x)
        
        return x

In [None]:
from torch.nn.modules.loss import _WeightedLoss

class SmoothCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    def k_one_hot(self, targets:torch.Tensor, n_classes:int, smoothing=0.0):
        with torch.no_grad():
            targets = torch.empty(size=(targets.size(0), n_classes),
                                  device=targets.device) \
                                  .fill_(smoothing /(n_classes-1)) \
                                  .scatter_(1, targets.data.unsqueeze(1), 1.-smoothing)
        return targets

    def reduce_loss(self, loss):
        return loss.mean() if self.reduction == 'mean' else loss.sum() \
        if self.reduction == 'sum' else loss

    def forward(self, inputs, targets):
        assert 0 <= self.smoothing < 1

        targets = self.k_one_hot(targets, inputs.size(-1), self.smoothing)
        log_preds = torch.nn.functional.log_softmax(inputs, -1)

        if self.weight is not None:
            log_preds = log_preds * self.weight.unsqueeze(0)

        return self.reduce_loss(-(targets * log_preds).sum(dim=-1))

Инициализируем модель с через конструктор

In [None]:
model = ResNet_like(layers=resnet_layers, num_classes=num_classes, bottleneck=bottleneck)
criterion = SmoothCrossEntropyLoss(smoothing=label_smoothing) #nn.CrossEntropyLoss()
if optimizer_type == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.0001, momentum=0.9)
elif optimizer_type == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.95, 0.99), eps=1e-06, weight_decay=0.0001, amsgrad=False)

In [None]:
if sheduler_type == 'step':
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=(num_epoch-warmup_epoch)//sheduler_cycle, gamma=0.1)
    if warmup_epoch > 0:
        scheduler_warmup = torch.optim.lr_scheduler.CyclicLR(optimizer, 
                                                         base_lr=learning_rate/(batch_per_epoch*warmup_epoch), 
                                                         max_lr=learning_rate,
                                                         step_size_up=((batch_per_epoch+1)*warmup_epoch), # should be batch_per_epoch + 1
                                                         step_size_down=0,
                                                         cycle_momentum=False,
                                                        )    
elif sheduler_type == 'cos':
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, (num_epoch-warmup_epoch)//sheduler_cycle, eta_min=0)
    if warmup_epoch > 0:
        scheduler_warmup = torch.optim.lr_scheduler.CyclicLR(optimizer, 
                                                         base_lr=learning_rate/(batch_per_epoch*warmup_epoch), 
                                                         max_lr=learning_rate,
                                                         step_size_up=((batch_per_epoch+1)*warmup_epoch), # should be batch_per_epoch + 1
                                                         step_size_down=0,
                                                         cycle_momentum=False,
                                                        )

Ячейча используется для запуска реализации ResNet в библиотеке PyTorch для сравнения с конструктором.

In [None]:
# from torchvision.models import resnet34
# model = resnet34(num_classes=10)
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.0001, momentum=0.9)
# # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.95, 0.99), eps=1e-06, weight_decay=0.0001, amsgrad=False)
# # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=40, gamma=0.1)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epoch, eta_min=0)

Загружаем модель на видеокарту.

In [None]:
device = torch.device("cuda:4" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

In [None]:
cols_name = ['epoch', 'time', 'current_lr', 'loss', 'accuracy_train', 'accuracy_val']
metrics_frame = pd.DataFrame(columns=cols_name)
metrics_frame_file = ('./metrics/' + model_name + '.csv')
metrics_frame_file

## Основной тренировочный цикл и подсчет метрик.
    Основная метрика accuracy (топ1). Очень не хотелось бы получать ошибку на топ5 accuracy при 10 классах.

In [None]:
for epoch in range(num_epoch):  # loop over the dataset multiple times

    model.train()
    start_time = time.time()
    for i, data in enumerate(trainloader, 0):
       
        # get the inputs; data is a list of [inputs, labels]
#         inputs, labels = data
#         print(data[1])
        inputs, labels = data[0].to(device), data[1].to(device)
#         print(labels)

        # zero the parameter gradients
        optimizer.zero_grad()
       
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
#         print('Loss: ', loss)
        loss.backward()
        optimizer.step()
        if epoch < 5:
            scheduler_warmup.step()
    if epoch >= 5:
        scheduler.step()
    
    #Accuracy train and val
    model.eval()
    correct_train, correct_val = 0, 0
    total_train, total_val = 0, 0
    with torch.no_grad():
        trainset_subset = torch.utils.data.Subset(trainset, np.random.randint(0,high=train_img_qty, size=train_img_qty//16))
        trainset_dataloader = torch.utils.data.DataLoader(trainset_subset, batch_size=batch_size,
                                                            shuffle=False)
        for images, labels in trainset_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        testset_subset = torch.utils.data.Subset(testset, np.random.randint(0,high=val_img_qty, size=val_img_qty//4))
        testset_dataloader = torch.utils.data.DataLoader(testset_subset, batch_size=batch_size,
                                                            shuffle=False)
        for images, labels in testset_dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()
            
        if correct_train/total_train >= .6 and correct_train/total_train >= metrics_frame['accuracy_val'].max():
            correct_val = 0
            total_val = 0
            with torch.no_grad():
                for images, labels in testloader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    outputs = outputs.to(device)
                    _, predicted = torch.max(outputs.data, 1)
                    total_val += labels.size(0)
                    correct_val += (predicted == labels).sum().item()
                    
    end_time = time.time()
    metrics = {'epoch': epoch+1,
               'time': end_time - start_time,
               'current_lr': [group['lr'] for group in optimizer.param_groups][0],
               'loss': float(loss),
               'accuracy_train': correct_train/total_train,
               'accuracy_val': correct_val/total_val,
               }

    print("Epoch {}/{}, Time: {:.2f} sec, current_lr: {:.2e}, Loss: {:.3f}, Accuracy_train: {:.3f}, Accuracy_val: {:.3f}".
          format(metrics['epoch'], num_epoch, metrics['time'], metrics['current_lr'], metrics['loss'], metrics['accuracy_train'], metrics['accuracy_val']))
    
    metrics_frame = metrics_frame.append(pd.DataFrame.from_dict(metrics,orient='index').T)
    metrics_frame.to_csv(metrics_frame_file,index=False)
    
    if save_best_model == True:
        if metrics['accuracy_val'] == metrics_frame['accuracy_val'].max():
            torch.save(model, save_model_dir + model_name + '.pt')

In [None]:
model = torch.load(save_model_dir + model_name + '.pt')
model.eval()

In [None]:
correct_train, correct_val = 0, 0
total_train, total_val = 0, 0
with torch.no_grad():
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        outputs = outputs.to(device)
        _, predicted = torch.max(outputs.data, 1)
        total_val += labels.size(0)
        correct_val += (predicted == labels).sum().item()
        
# print('Accuracy final model on validation dataset is: {:.3f})'.format(correct_val/total_val))
print(f'Accuracy final model on the validation dataset is: {(correct_val/total_val):.3f}')