## Pipeline. На 120 эпохах accuracy на тестовом датасете 90.5%.
### На 450 эпохах accuracy 92.4%
Обучение проходит на датасете Imagenette.
Информацию по загрузке датасета можно найти в VGG_like.ipynb

In [1]:
import time

import torch
import torchvision
from torch import nn
import pandas as pd
import numpy as np

from modules import datasets_loader, CNN_blocks, train_tools

In [2]:
# hyper params
batch_size = 32
num_epoch = 450
learning_rate = 0.01
optimizer_type = 'SGD'

scheduler_type = 'cos'
cosine_cycles = 2
decay_steps = 5

save_model_dir = './models/'
metrics_dir = './metrics/'

### Создаем DataLoader попутно предобрабатывая данные
- Загрузку датасета можно найти в [ResNet-constructor](ResNet_constructor.ipynb)
- Предварительный смотр данных можно найти в [ResNet-constructor](ResNet_constructor.ipynb)

In [3]:
from torchvision import transforms, datasets

train_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
test_transform = transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

torch.set_num_threads(8)
trainset = datasets.ImageFolder(root='../imagenette/imagenette2-320/train/', transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = datasets.ImageFolder(root='../imagenette/imagenette2-320/val/', transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, #batch_size=batch_size,
                                         shuffle=False)

  "please use transforms.RandomResizedCrop instead.")


In [4]:
len(trainloader)

297

## Создаем конструктор VGG-like сетей.

### В конструктор подается словарь с параметрами сети:

 - body_input - разрешение и кол-во каналов входящего изображения [высота, ширина, каналы].
 - conv_layers - параметры сверточных блоков тела сети. Каждый блок задается как [кол-во сверточных слоев, padding, stride]. 
 Padding и stride задаются для всех сверточных слоев в данном блоке.
 - linear_layers - параметры линейный слоев сети. [кол-во линейных слоев, кол-во нейронов в каждом слое]. в кол-во слоев включается слой на кол-во классов
 - class_qty - кол-во классов.
 - print_dim - печатает параметры тензора на выходе из соответствующего слоя.

MaxPooling слой всегда имеет параметры kernel_size=2, stride=2

In [5]:
params = {
    'body_input': [224, 224, 3], # list height, width, channel
    'conv_layers': [[1, 1, 1], [1,1,1], [2,1,1],[2,1,1], [2,1,1]], # list of lists. each layer should be list: qty conv layers, stride same or valid, padding
    'linear_layers': [3, 256], # qty linear layers, qty neurons
    'class_qty': [10],
    'print_dim': True # True if you want to show how to change the tensor dimention via convolutional layers
}

In [6]:
np.array(params['conv_layers'])[:,0].sum() + params['linear_layers'][0]

11

In [7]:
n = cosine_cycles if scheduler_type == 'cos' else decay_steps
model_name = f"VGG{np.array(params['conv_layers'])[:,0].sum() + params['linear_layers'][0]}_{optimizer_type}_lr{learning_rate}_b{batch_size}_{scheduler_type}_sc{(num_epoch)//n}"
print(f'model name is {model_name}')

model name is VGG11_SGD_lr0.01_b32_cos_sc225


In [8]:
class VGG_like(nn.Module):

    def __init__(self, params):
        self.body_input = params['body_input']
        self.conv_layers = params['conv_layers']
        self.linear_layers = params['linear_layers']
        self.class_qty = params['class_qty']
        self.print_dim = params['print_dim']
        
        super().__init__()
        
        def conv_block(conv_layer, resolution=None, i=None, print_dim=False):
            for qty in range(conv_layer[0]):
                self.channels_out = min(64*(2**i), 512)
                self.body.add_module(name='Block%2d_Conv_%d'%(i,qty), module=nn.Conv2d(
                        self.channels_input, 
                        self.channels_out, 
                        kernel_size=3, 
                        stride=conv_layer[1], 
                        padding=conv_layer[2]))
                self.body.add_module(name='Block%2d_Relu_%d'%(i,qty), module=nn.ReLU())
                self.channels_input = self.channels_out
                resolution = (resolution - 2 + conv_layer[2]*2) // conv_layer[1] 
                if print_dim: print('Tensor dim after conv layer is: ', [*resolution, self.channels_input])
            self.body.add_module(name='Block%2d_MaxPool'%i, module=nn.MaxPool2d(kernel_size=2, stride=2))
            resolution = resolution // 2 
            if print_dim: print('Tensor dim after maxpool layer is: ', [*resolution, self.channels_input])
            return resolution
        
        def linear_block(linear_layer, resolution):
            self.input = resolution[0] * resolution[1] * min(64*(2**len(self.conv_layers)),512) # resolution[0] * resolution[1] *
            for i in range(self.linear_layers[0]-1):
                self.head.add_module(name='Linear%2d'%i, module=nn.Linear(self.input, self.linear_layers[1]))
                self.head.add_module(name='Dropout%2d'%i, module=nn.Dropout(.5))
                self.head.add_module(name='Relu_%2d'%i, module=nn.ReLU())
                self.input = linear_layer[1]
            self.head.add_module(name='output',module=nn.Linear(self.linear_layers[1], self.class_qty[0]))
        
        self.body = nn.Sequential()
        self.channels_input = self.body_input[2]
        self.resolution = np.array([self.body_input[0], self.body_input[1]])
        i=0
        for conv_layer in self.conv_layers:
            self.resolution = conv_block(conv_layer, self.resolution, i, print_dim=self.print_dim)
            i+=1

        self.head = nn.Sequential()
        linear_block(self.linear_layers, self.resolution)


    def forward(self, input):
        x = self.body(input)
        x = x.view(x.size(0), -1)
        out = self.head(x)
        return out

Инициализируем модель с через конструктор

In [9]:
model = VGG_like(params)
criterion = nn.CrossEntropyLoss()
optimizer = train_tools.get_optimizer(model, optimizer_type, learning_rate)
scheduler = train_tools.get_scheduler(optimizer, scheduler_type, step_len=num_epoch//decay_steps, cycle_len=num_epoch//cosine_cycles)

Tensor dim after conv layer is:  [224, 224, 64]
Tensor dim after maxpool layer is:  [112, 112, 64]
Tensor dim after conv layer is:  [112, 112, 128]
Tensor dim after maxpool layer is:  [56, 56, 128]
Tensor dim after conv layer is:  [56, 56, 256]
Tensor dim after conv layer is:  [56, 56, 256]
Tensor dim after maxpool layer is:  [28, 28, 256]
Tensor dim after conv layer is:  [28, 28, 512]
Tensor dim after conv layer is:  [28, 28, 512]
Tensor dim after maxpool layer is:  [14, 14, 512]
Tensor dim after conv layer is:  [14, 14, 512]
Tensor dim after conv layer is:  [14, 14, 512]
Tensor dim after maxpool layer is:  [7, 7, 512]


Загружаем модель на видеокарту.

In [10]:
device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

cuda:5


VGG_like(
  (body): Sequential(
    (Block 0_Conv_0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 0_Relu_0): ReLU()
    (Block 0_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 1_Conv_0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 1_Relu_0): ReLU()
    (Block 1_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 2_Conv_0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 2_Relu_0): ReLU()
    (Block 2_Conv_1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 2_Relu_1): ReLU()
    (Block 2_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 3_Conv_0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 3_Relu_0): ReLU()
    (Block 3_Conv_1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(

In [11]:
datasets_loader.create_dir(save_model_dir)
datasets_loader.create_dir(metrics_dir)

NameError: name 'save_model_dir' is not defined

In [None]:
cols_name = ['epoch', 'time', 'current_lr', 'loss', 'accuracy_train', 'accuracy_val']
metrics_frame = pd.DataFrame(columns=cols_name)
metrics_frame_file = (metrics_dir + model_name + '.csv')
print(f'File with train metrics save to {metrics_frame_file}')

## Основной тренировочный цикл и подсчет метрик.
    Основная метрика accuracy (топ1). Очень не хотелось бы получать ошибку на топ5 accuracy при 10 классах.

In [None]:
for epoch in range(num_epoch):
    # train cycle
    model.train()
    start_time = time.time()
    for data in trainloader:
        loss, outputs = train_tools.make_step(data, optimizer, model, criterion, device)
    scheduler.step()
    
    # metrics calc
    model.eval()
    accuracy_train = train_tools.approx_accuracy(trainset, model, device, denominator=16)
    accuracy_val = train_tools.approx_accuracy(testset, model, device, denominator=4)
    
    if accuracy_val >= .88 and accuracy_val >= metrics_frame['accuracy_val'].max():
        accuracy_val = train_tools.true_accuracy(testloader, model, device)

    end_time = time.time()
    metrics = {'epoch': epoch+1,
               'time': end_time - start_time,
               'current_lr': [group['lr'] for group in optimizer.param_groups][0],
               'loss': float(loss),
               'accuracy_train': accuracy_train,
               'accuracy_val': accuracy_val,
               }

    print("Epoch {}/{}, Time: {:.2f} sec, current_lr: {:.2e}, Loss: {:.3f}, Accuracy_train: {:.3f}, Accuracy_val: {:.3f}".
          format(metrics['epoch'], num_epoch, metrics['time'], metrics['current_lr'], metrics['loss'], metrics['accuracy_train'], metrics['accuracy_val']))
    
    metrics_frame = metrics_frame.append(pd.DataFrame.from_dict(metrics,orient='index').T)
    metrics_frame.to_csv(metrics_frame_file,index=False)
    
    if save_best_model:
        if metrics['accuracy_val'] == metrics_frame['accuracy_val'].max():
            torch.save(model, save_model_dir + model_name + '.pt')
if not save_best_model:
    torch.save(model, save_model_dir + model_name + '.pt')