# Pipeline ResNet-like
### В пайплайне используется контруктор ResNet подобных сетей
- ResNet сильно оверфитится на Imahenette датасете. 
- Accuracy на тренировочном и тестово

In [1]:
import torch
import torchvision
from torch import nn
import pandas as pd
import numpy as np
import time

In [2]:
# hyper params
batch_size = 64
num_epoch = 200

### Создаем DataLoader попутно предобрабатывая данные
- Загрузку датасета можно найти в VGG_like.ipynb
- Предварительный смотр данных можно найти в VGG_like.ipynb

In [3]:
from torchvision import transforms, datasets

train_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
test_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

trainset = datasets.ImageFolder(root='../imagenette/imagenette2-320/train/', transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = datasets.ImageFolder(root='../imagenette/imagenette2-320/val/', transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, #batch_size=batch_size,
                                         shuffle=False)

  "please use transforms.RandomResizedCrop instead.")


## Создаем конструктор ResNet-like сетей.

### Конструктор представляет собой класс, который ожидает на вход следующие параметры:

 - layers - список с количеством стандартных блоков по слоям
 - num_classes - количество классов
 - bottleneck - определяет использование стандартных блоков или 'bottleneck' блоков 

(!) Конструктор ожидает на вход изображение с разрешением 224х224х3

Примеры стандартных сетей:
ResNet-18: 
model = ResNet_like(layers=[2,2,2,2], num_classes=10, bottleneck=False)

ResNet-36: 
model = ResNet_like(layers=[3,4,6,3], num_classes=10, bottleneck=False)

ResNet-50:
model = ResNet_like(layers=[3,4,6,3], num_classes=10, bottleneck=True)

ResNet-101:
model = ResNet_like(layers=[3,4,23,3], num_classes=10, bottleneck=True)

ResNet-152:
model = ResNet_like(layers=[3,8,36,3], num_classes=10, bottleneck=True)

Выносим в функции сверточные слои для уменьшения количества букв в коде

In [4]:
def conv1x1(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)

def conv3x3(in_channels, out_channels, stride=1,padding=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding)

Задаем базовые блоки через классы.
Класс NormalBlock собирает стандартный ResNet блок с skipconnection'ом
Класс BottleneckBlock собирает Bottleneck ResNet блок с skipconnection'ом

Каждый класс ожидает параметры:
 - num_layer - порядковый номер слоя, в котором будет использоваться данных блок. В стандартной ResNet архитектуре блоки используются со второго слоя.
 - downsample - определяет тип downsampling'а.
     - 0 - downsampling не используется
     - 1 - downsampling используется в блоке, где уменьшается разрешение и увеличивается кол-во каналов
     - -1 - downsampling используется в блоке, где разрешение не уменьшается, но увеличивается кол-во каналов (обычно последний слой)

In [5]:
class NormalBlock(nn.Module):
    def __init__(
        self,
        num_layer,
        downsample = 0,
        
    ):
        super(NormalBlock, self).__init__()
        self.use_downsample = downsample
        if num_layer == 2 and downsample == 1:
            self.in_channels = 16*(2**num_layer)
        elif num_layer > 2 and downsample != 0:
            self.in_channels = 16*(2**(num_layer-1))
        elif downsample == 0: 
            self.in_channels = 16*(2**num_layer)
            
        self.out_channels = 16*(2**num_layer)
        
        if downsample == 1:
            self.downsample = nn.Sequential(
                conv1x1(self.in_channels, self.out_channels, stride=2),
                nn.BatchNorm2d(self.out_channels))
            self.maxpool = nn.MaxPool2d(kernel_size=2,stride=2, padding=0)
        elif downsample == -1:
            self.downsample = conv1x1(self.in_channels, self.out_channels, stride=1)
        
        self.conv1 = conv3x3(self.in_channels, self.out_channels)
        self.in_channels = self.out_channels
        self.bn1 = nn.BatchNorm2d(self.out_channels)
        self.relu = nn.ReLU()
        self.conv2 = conv3x3(self.in_channels, self.out_channels)
        self.bn2 = nn.BatchNorm2d(self.out_channels)
        
    def forward(self, x):
        skip = x
#         print('Block input',x.shape)
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
#         print('Block before skip',out.shape)

        if self.use_downsample != 0:
#             print('Before downsample',out.shape, skip.shape)
            if self.use_downsample == 1:
                out = self.maxpool(out)
            skip = self.downsample(x)
#             print('After downsample',out.shape, skip.shape)
        out += skip
        out = self.relu(out)

        return out

class BottleneckBlock(nn.Module):
    def __init__(
        self,
        num_layer,
        downsample = 0
        
    ):
        super(BottleneckBlock, self).__init__()
        
        self.use_downsample = downsample
        if num_layer == 2 and downsample == 1:
            self.in_channels = 16*(2**num_layer)
        elif num_layer > 2 and downsample != 0:
            self.in_channels = 16*(2**(num_layer-1))*4
        elif downsample == 0: 
            self.in_channels = 16*(2**num_layer)*4
            
        self.out_channels = 16*(2**num_layer)
        
        if downsample == 1:
            self.downsample = nn.Sequential(
                conv1x1(self.in_channels, self.out_channels, stride=2),
                nn.BatchNorm2d(self.out_channels))
            self.maxpool = nn.MaxPool2d(kernel_size=2,stride=2, padding=0)
        elif downsample == -1:
            self.downsample = nn.Sequential(
                conv1x1(self.in_channels, self.out_channels, stride=1))
   
        if downsample == 1:
            self.downsample = nn.Sequential(
                conv1x1(self.in_channels, self.out_channels*4, stride=2),
                nn.BatchNorm2d(self.out_channels*4))
            self.maxpool = nn.MaxPool2d(kernel_size=2,stride=2, padding=0)
        elif downsample == -1:
            self.downsample = conv1x1(self.in_channels, self.out_channels*4, stride=1)
        
        self.conv1 = conv1x1(self.in_channels, self.out_channels)
        self.in_channels = self.out_channels
        self.bn1 = nn.BatchNorm2d(self.out_channels)
        self.relu = nn.ReLU()
        self.conv2 = conv3x3(self.in_channels, self.out_channels)
        self.bn2 = nn.BatchNorm2d(self.out_channels)
        self.relu = nn.ReLU()
        self.out_channels = 16*(2**num_layer)*4
        self.conv3 = conv1x1(self.in_channels, self.out_channels)
        self.bn3 = nn.BatchNorm2d(self.out_channels)
        self.in_channels = self.out_channels
        
    def forward(self, x):
        skip = x
        
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv3(out)
        out = self.bn3(out)

        if self.use_downsample != 0:
#             print(out.shape, x.shape)
            if self.use_downsample == 1:
                out = self.maxpool(out)
            skip = self.downsample(x)
#             print(out.shape, x.shape)
        out += skip
        out = self.relu(out)
            
        return out

### Класс конструктор ResNet подобных архитектур

In [6]:
class ResNet_like(nn.Module):

    def __init__(self, 
                 layers, 
                 num_classes,
                 bottleneck,
                 
                 ):
        
        super(ResNet_like, self).__init__()
        self.first = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        
        self.body = nn.Sequential()
        if bottleneck == True:
            for num, layer in enumerate(layers):
                for block in range(layer):
                    if block == 0  and num < len(layers) - 1:
                        downsample = 1
                    elif block == 0 and num == len(layers) - 1:
                        downsample = -1
                    elif block != 0:  
                        downsample = 0
                    self.body.add_module(name='block_%d_%d'%(num+2,block+1), module=BottleneckBlock(num+2, downsample))
        elif bottleneck == False:
            for num, layer in enumerate(layers):
                for block in range(layer):
                    if block == 0  and num < len(layers) - 1:
                        downsample = 1
                    elif block == 0 and num == len(layers) - 1:
                        downsample = -1
                    elif block != 0:  
                        downsample = 0
                    self.body.add_module(name='block_%d_%d'%(num+2,block+1), module=NormalBlock(num+2, downsample))
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        if bottleneck == True:
            self.linear_input = 32*(2**(len(layers)))*4
        else:
            self.linear_input = 32*(2**(len(layers)))
        self.linear = nn.Linear(self.linear_input, num_classes)
        
    def forward(self, x):

        x = self.first(x)
        x = self.body(x)
#         print('Shape input avgpool:', x.shape)
        x = self.avgpool(x)
#         print('Shape input linear:', x.shape)
        x = x.view(x.size(0), -1)
#         print('Shape input linear:', x.shape)
        x = self.linear(x)
#         x = self.final(x)
        
        return x

Инициализируем модель с через конструктор

In [7]:
model = ResNet_like(layers=[2,2,2,2], num_classes=10, bottleneck=False)
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

Ячейча используется для запуска реализации ResNet в библиотеке PyTorch для сравнения с конструктором.

In [8]:
# from torchvision.models import resnet18
# model = resnet18(num_classes = 10)
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

Загружаем модель на видеокарту.

In [9]:
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

cuda:2


ResNet_like(
  (first): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (body): Sequential(
    (block_2_1): NormalBlock(
      (downsample): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(2, 2))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    

## Основной тренировочный цикл и подсчет метрик.
    Основная метрика accuracy (топ1). Очень не хотелось бы получать ошибку на топ5 accuracy при 10 классах.

In [None]:
for epoch in range(num_epoch):  # loop over the dataset multiple times
    
#     model.train()
    
    for i, data in enumerate(trainloader, 0):
        start_time = time.time()
        # get the inputs; data is a list of [inputs, labels]
#         inputs, labels = data
#         print(data[1])
        inputs, labels = data[0].to(device), data[1].to(device)
#         print(labels)

        # zero the parameter gradients
        optimizer.zero_grad()
       
        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
#         print('Loss: ', loss)
        loss.backward()
        optimizer.step()

    #Accuracy train and val
    correct_train, correct_val = 0, 0
    total_train, total_val = 0, 0
    with torch.no_grad():
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()
    end_time = time.time()
    print("Epoch {}/{}, Time: {:.2f} sec, Loss: {:.3f}, Accuracy_train: {:.3f}, Accuracy_val: {:.3f}".
          format(epoch+1, num_epoch, end_time - start_time, loss, correct_train/total_train, correct_val/total_val))
#     print('Epoch: ', epoch, ' loss:', loss[0], ' accuracy_val: ', accuracy(outputs, labels))

       
        # print statistics
#         running_loss += loss.item()
#         if i % 10 == 9:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, loss)) #running_loss / 2000
#             print('labels:',  labels)
#             print('outputs: ', outputs)
#             running_loss = 0.0

print('Finished Training')

Epoch 1/200, Time: 71.16 sec, Loss: 3.364, Accuracy_train: 0.419, Accuracy_val: 0.106
Epoch 2/200, Time: 71.20 sec, Loss: 2.097, Accuracy_train: 0.495, Accuracy_val: 0.120
Epoch 3/200, Time: 70.50 sec, Loss: 2.229, Accuracy_train: 0.542, Accuracy_val: 0.125
Epoch 4/200, Time: 71.69 sec, Loss: 2.203, Accuracy_train: 0.609, Accuracy_val: 0.132
Epoch 5/200, Time: 71.92 sec, Loss: 2.544, Accuracy_train: 0.601, Accuracy_val: 0.122
Epoch 6/200, Time: 71.01 sec, Loss: 3.513, Accuracy_train: 0.652, Accuracy_val: 0.115
Epoch 7/200, Time: 71.50 sec, Loss: 2.733, Accuracy_train: 0.621, Accuracy_val: 0.112
Epoch 8/200, Time: 71.59 sec, Loss: 2.607, Accuracy_train: 0.656, Accuracy_val: 0.191
Epoch 9/200, Time: 71.83 sec, Loss: 2.039, Accuracy_train: 0.676, Accuracy_val: 0.148
Epoch 10/200, Time: 71.23 sec, Loss: 2.353, Accuracy_train: 0.706, Accuracy_val: 0.188
Epoch 11/200, Time: 70.77 sec, Loss: 2.429, Accuracy_train: 0.724, Accuracy_val: 0.180
Epoch 12/200, Time: 71.26 sec, Loss: 2.248, Accuracy