## Pipeline. На 120 эпохах accuracy на тестовом датасете 90.5%.
### На 450 эпохах accuracy 92.4%
Обучение проходит на датасете Imagenette.
Информацию по загрузке датасета можно найти в VGG_like.ipynb

In [1]:
import torch
import torchvision
from torch import nn
import pandas as pd
import numpy as np
import time

In [2]:
# hyper params
batch_size = 32
num_epoch = 450
learning_rate = 0.01
sheduler_type = 'cosine' # 'cosine' or 'step'
sheduler_cycle = 3
optimizer_type = 'SGD' # 'SGD' or 'Adam'

### Создаем DataLoader попутно предобрабатывая данные
- Загрузку датасета можно найти в VGG_like.ipynb
- Предварительный смотр данных можно найти в VGG_like.ipynb

In [3]:
from torchvision import transforms, datasets

train_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.RandomSizedCrop(224),
#         transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2),
#         transforms.RandomResizedCrop(224, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
test_transform = transforms.Compose([
        transforms.Resize((224,224)),
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

trainset = datasets.ImageFolder(root='../imagenette/imagenette2-320/train/', transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = datasets.ImageFolder(root='../imagenette/imagenette2-320/val/', transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, #batch_size=batch_size,
                                         shuffle=False)

  "please use transforms.RandomResizedCrop instead.")


In [4]:
len(trainloader)

297

## Создаем конструктор VGG-like сетей.

### В конструктор подается словарь с параметрами сети:

 - body_input - разрешение и кол-во каналов входящего изображения [высота, ширина, каналы].
 - conv_layers - параметры сверточных блоков тела сети. Каждый блок задается как [кол-во сверточных слоев, padding, stride]. 
 Padding и stride задаются для всех сверточных слоев в данном блоке.
 - linear_layers - параметры линейный слоев сети. [кол-во линейных слоев, кол-во нейронов в каждом слое]. в кол-во слоев включается слой на кол-во классов
 - class_qty - кол-во классов.
 - print_dim - печатает параметры тензора на выходе из соответствующего слоя.

MaxPooling слой всегда имеет параметры kernel_size=2, stride=2

In [5]:
params = {
    'body_input': [224, 224, 3], # list height, width, channel
    'conv_layers': [[1, 1, 1], [1,1,1], [2,1,1],[2,1,1], [2,1,1]], # list of lists. each layer should be list: qty conv layers, stride same or valid, padding
    'linear_layers': [3, 256], # qty linear layers, qty neurons
    'class_qty': [10],
    'print_dim': True # True if you want to show how to change the tensor dimention via convolutional layers
}

In [6]:
class VGG_like(nn.Module):

    def __init__(self, params):
        self.body_input = params['body_input']
        self.conv_layers = params['conv_layers']
        self.linear_layers = params['linear_layers']
        self.class_qty = params['class_qty']
        self.print_dim = params['print_dim']
        
        super().__init__()
        
        def conv_block(conv_layer, resolution=None, i=None, print_dim=False):
            for qty in range(conv_layer[0]):
                self.channels_out = min(64*(2**i), 512)
                self.body.add_module(name='Block%2d_Conv_%d'%(i,qty), module=nn.Conv2d(
                        self.channels_input, 
                        self.channels_out, 
                        kernel_size=3, 
                        stride=conv_layer[1], 
                        padding=conv_layer[2]))
                self.body.add_module(name='Block%2d_Relu_%d'%(i,qty), module=nn.ReLU())
                self.channels_input = self.channels_out
                resolution = (resolution - 2 + conv_layer[2]*2) // conv_layer[1] 
                if print_dim: print('Tensor dim after conv layer is: ', [*resolution, self.channels_input])
            self.body.add_module(name='Block%2d_MaxPool'%i, module=nn.MaxPool2d(kernel_size=2, stride=2))
            resolution = resolution // 2 
            if print_dim: print('Tensor dim after maxpool layer is: ', [*resolution, self.channels_input])
            return resolution
        
        def linear_block(linear_layer, resolution):
            self.input = resolution[0] * resolution[1] * min(64*(2**len(self.conv_layers)),512) # resolution[0] * resolution[1] *
            for i in range(self.linear_layers[0]-1):
                self.head.add_module(name='Linear%2d'%i, module=nn.Linear(self.input, self.linear_layers[1]))
                self.head.add_module(name='Dropout%2d'%i, module=nn.Dropout(.5))
                self.head.add_module(name='Relu_%2d'%i, module=nn.ReLU())
                self.input = linear_layer[1]
            self.head.add_module(name='output',module=nn.Linear(self.linear_layers[1], self.class_qty[0]))
        
        self.body = nn.Sequential()
        self.channels_input = self.body_input[2]
        self.resolution = np.array([self.body_input[0], self.body_input[1]])
        i=0
        for conv_layer in self.conv_layers:
            self.resolution = conv_block(conv_layer, self.resolution, i, print_dim=self.print_dim)
            i+=1

        self.head = nn.Sequential()
        linear_block(self.linear_layers, self.resolution)


    def forward(self, input):
        x = self.body(input)
#         x = nn.functional.adaptive_avg_pool2d(x, (1, 1))
        x = x.view(x.size(0), -1)
#         print(x.shape)
        out = self.head(x)
#         print(out.shape)
        return out

Инициализируем модель с через конструктор

In [7]:
model = VGG_like(params)
criterion = nn.CrossEntropyLoss()
if optimizer_type == 'SGD':
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=0.0001, momentum=0.9)
elif optimizer_type == 'Adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.95, 0.99), eps=1e-06, weight_decay=0.0001, amsgrad=False)
print(optimizer)
    
if sheduler_type == 'step':
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=num_epoch//sheduler_cycle, gamma=0.1)
elif sheduler_type == 'cosine':
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, num_epoch//sheduler_cycle, eta_min=0)

Tensor dim after conv layer is:  [224, 224, 64]
Tensor dim after maxpool layer is:  [112, 112, 64]
Tensor dim after conv layer is:  [112, 112, 128]
Tensor dim after maxpool layer is:  [56, 56, 128]
Tensor dim after conv layer is:  [56, 56, 256]
Tensor dim after conv layer is:  [56, 56, 256]
Tensor dim after maxpool layer is:  [28, 28, 256]
Tensor dim after conv layer is:  [28, 28, 512]
Tensor dim after conv layer is:  [28, 28, 512]
Tensor dim after maxpool layer is:  [14, 14, 512]
Tensor dim after conv layer is:  [14, 14, 512]
Tensor dim after conv layer is:  [14, 14, 512]
Tensor dim after maxpool layer is:  [7, 7, 512]
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0.9
    nesterov: False
    weight_decay: 0.0001
)


Загружаем модель на видеокарту.

In [8]:
device = torch.device("cuda:5" if torch.cuda.is_available() else "cpu")

print(device)

model.to(device)

cuda:5


VGG_like(
  (body): Sequential(
    (Block 0_Conv_0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 0_Relu_0): ReLU()
    (Block 0_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 1_Conv_0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 1_Relu_0): ReLU()
    (Block 1_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 2_Conv_0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 2_Relu_0): ReLU()
    (Block 2_Conv_1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 2_Relu_1): ReLU()
    (Block 2_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 3_Conv_0): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 3_Relu_0): ReLU()
    (Block 3_Conv_1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(

In [9]:
cols_name = ['epoch', 'time', 'current_lr', 'loss', 'accuracy_train', 'accuracy_val']
metrics_frame = pd.DataFrame(columns=cols_name)

## Основной тренировочный цикл и подсчет метрик.
    Основная метрика accuracy (топ1). Очень не хотелось бы получать ошибку на топ5 accuracy при 10 классах.

In [10]:
for epoch in range(num_epoch):  # loop over the dataset multiple times

    #     model.train()
    start_time = time.time()
    
    
    for i, data in enumerate(trainloader, 0):
        
        # get the inputs; data is a list of [inputs, labels]
#         inputs, labels = data
#         print(data[1])
        inputs, labels = data[0].to(device), data[1].to(device)
#         print(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
#         print('Loss: ', loss)
        loss.backward()
        optimizer.step()
    scheduler.step()
    
    #Accuracy train and val
    correct_train, correct_val = 0, 0
    total_train, total_val = 0, 0
    with torch.no_grad():
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()
    end_time = time.time()
    metrics = {'epoch': epoch+1,
               'time': end_time - start_time,
               'current_lr': scheduler.get_last_lr()[0],
               'loss': float(loss),
               'accuracy_train': correct_train/total_train,
               'accuracy_val': correct_val/total_val
              }
    print("Epoch {}/{}, Time: {:.2f} sec, current_lr: {:.2e}, Loss: {:.3f}, Accuracy_train: {:.3f}, Accuracy_val: {:.3f}".
          format(metrics['epoch'], num_epoch, metrics['time'], metrics['current_lr'], metrics['loss'], metrics['accuracy_train'], metrics['accuracy_val']))
    
    metrics_frame = metrics_frame.append(pd.DataFrame.from_dict(metrics,orient='index').T)
#     metrics_frame.to_csv('../metrics/VGG_constructor_metrics_1.csv',index=False)
    
#     print('Epoch: ', epoch, ' loss:', loss[0], ' accuracy_val: ', accuracy(outputs, labels))

       
        # print statistics
#         running_loss += loss.item()
#         if i % 10 == 9:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, loss)) #running_loss / 2000
#             print('labels:',  labels)
#             print('outputs: ', outputs)
#             running_loss = 0.0

print('Finished Training')

Epoch 1/6, Time: 110.15 sec, current_lr: 9.33e-03, Loss: 2.290, Accuracy_train: 0.105, Accuracy_val: 0.091
Epoch 2/6, Time: 109.77 sec, current_lr: 7.50e-03, Loss: 2.289, Accuracy_train: 0.105, Accuracy_val: 0.091
Epoch 3/6, Time: 110.66 sec, current_lr: 5.00e-03, Loss: 2.303, Accuracy_train: 0.107, Accuracy_val: 0.100
Epoch 4/6, Time: 110.23 sec, current_lr: 2.50e-03, Loss: 2.299, Accuracy_train: 0.133, Accuracy_val: 0.117
Epoch 5/6, Time: 110.57 sec, current_lr: 6.70e-04, Loss: 2.334, Accuracy_train: 0.162, Accuracy_val: 0.161
Epoch 6/6, Time: 111.46 sec, current_lr: 1.00e-02, Loss: 2.251, Accuracy_train: 0.184, Accuracy_val: 0.190
Finished Training
