# Тестовая задача
### Делаем pipeline для обучения модели VGG на датасете imagenette
### Oсновые блоки
    - Загрузка датасета
    - Предобработка данных и создание Dataloader'a
    - Создание модели VGG
    - Запуск тренировки с подсчетом метрик
    

### Загружаем датасет imagenette

In [None]:
conda install -c fastai -c pytorch fastai

In [None]:
help(untar_data)

In [None]:
path = untar_data(URLs.IMAGENETTE_320, dest ='./imagenette/')
path

## Pipeline. На 200 эпохах accuracy на тестовом датасете 70%.

In [1]:
import torch
import torchvision
from torch import nn
from fastai import *
from fastai.vision import *
import pandas as pd

In [2]:
# hyper params
batch_size = 32
num_epoch = 200

### Посмортим csv, приложенный к датасету. 

In [None]:
labels_df = pd.read_csv('../imagenette/imagenette2-320/noisy_imagenette.csv')
labels_df.head()

In [None]:
for cols in labels_df.columns:
    print(cols, ' have ', len(labels_df[cols].unique()), 'labels')

### Создаем DataLoader попутно предобрабатывая данные

In [3]:
from torchvision import transforms, datasets

train_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
test_transform = transforms.Compose([
        transforms.Resize((224,224)),
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

trainset = datasets.ImageFolder(root='../imagenette/imagenette2-320/train/', transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = datasets.ImageFolder(root='../imagenette/imagenette2-320/val/', transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, #batch_size=batch_size,
                                         shuffle=False)

  "please use transforms.RandomResizedCrop instead.")


Давайте глянем на примеры картинок и проверим значения классов для них

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image

def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
print(labels)


## Создаем модель VGG. Делаем VGG-16 и выкидаем слои согласно "некоторой стратегии".

In [None]:
class VGG(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            # 224x224x64
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.layer2 = nn.Sequential(
            # 112x112x128
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
#             nn.Conv2d(128,128, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.layer3 = nn.Sequential(
            # 56x56x256
            nn.Conv2d(128,256, kernel_size=3,stride=1, padding=1),
            nn.ReLU(),
#             nn.Conv2d(256,256, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
#             nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.layer4 = nn.Sequential(
            # 28x28x512
            nn.Conv2d(256,512, kernel_size=3,stride=1, padding=1),
            nn.ReLU(),
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.layer5 = nn.Sequential(
            # 14x14x512
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
#             nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
#             nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
            )
        self.dense_layer = nn.Sequential(
            # 7x7x512 -> 1x1x4096 -> 1x1x1000 -> softmax
            nn.Linear(in_features=7*7*512, out_features = 128),
            nn.Dropout(.5),
            nn.ReLU(),
#             nn.Linear(in_features=256, out_features=128),
#             nn.Dropout(.5),
#             nn.ReLU(),
            nn.Linear(in_features=128, out_features=10),
#             nn.ReLU(),
#             nn.Softmax(dim=1)
#             nn.CrossEntropyLoss()
            )
    def forward(self, input):
        x = self.layer1(input)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = x.view(x.size(0), -1)
#         print(x.shape)
        out = self.dense_layer(x)
#         print(out.shape)
        return out

## Создаем конструктор VGG-like сетей.

### В конструктор подается словарь с параметрами сети:

body_input - разрешение и кол-во каналов входящего изображения [высота, ширина, каналы].

conv_layers - параметры сверточных блоков тела сети. Каждый блок задается как [кол-во сверточных слоев, padding, stride]. 

Padding и stride задаются для всех сверточных слоев в данном блоке.

linear_layers - параметры линейный слоев сети. [кол-во линейных слоев, кол-во нейронов в каждом слое]. в кол-во слоев включается слой на кол-во классов

class_qty - кол-во классов.

In [4]:
params = {
    'body_input': [224, 224, 3], # list height, width, channel
    'conv_layers': [[2, 1, 1], [2,1,1], [3,1,1],[3,1,1], [3,1,1]], # list of lists. each layer should be list: qty conv layers, stride same or valid, padding
    'linear_layers': [3, 128], # qty linear layers, qty neurons
    'class_qty': [10]    
}

In [5]:
class VGG_like(nn.Module):

    def __init__(self, params):
        self.body_input = params['body_input']
        self.conv_layers = params['conv_layers']
#         self.body_output_shape = params['body_output_shape']
        self.linear_layers = params['linear_layers']
        self.class_qty = params['class_qty']
        
        super().__init__()
        
        def conv_block(conv_layer, resolution=None, i=None):
            for qty in range(conv_layer[0]):
                self.channels_out = min(64*(2**i), 512)
                self.body.add_module(name='Block%2d_Conv_%d'%(i,qty), module=nn.Conv2d(
                        self.channels_input, 
                        self.channels_out, 
                        kernel_size=3, 
                        stride=conv_layer[1], 
                        padding=conv_layer[2]))
                self.body.add_module(name='Block%2d_Relu_%d'%(i,qty), module=nn.ReLU())
                self.channels_input = self.channels_out
                resolution = (resolution - 2 + conv_layer[1]*2) // conv_layer[2] 
#                 print('Resolution after conv layer is: ', resolution)
            self.body.add_module(name='Block%2d_MaxPool'%i, module=nn.MaxPool2d(kernel_size=2, stride=2))
            resolution = resolution // 2 
#             print('Resolution after maxpool layer is: ', resolution)
            return resolution
        
        def linear_block(linear_layer, resolution):
            self.input = resolution[0] * resolution[1] * min(64*(2**len(self.conv_layers)),512)
            for i in range(self.linear_layers[0]-1):
                self.head.add_module(name='Linear%2d'%i, module=nn.Linear(self.input, self.linear_layers[1]))
                self.head.add_module(name='Dropout%2d'%i, module=nn.Dropout(.5))
                self.head.add_module(name='Relu_%2d'%i, module=nn.ReLU())
                self.input = linear_layer[1]
            self.head.add_module(name='output',module=nn.Linear(self.linear_layers[1], self.class_qty[0]))
        
        self.body = nn.Sequential()
        self.channels_input = self.body_input[2]
        self.resolution = np.array([self.body_input[0], self.body_input[1]])
        i=0
        for conv_layer in self.conv_layers:
            self.resolution = conv_block(conv_layer, self.resolution, i)
            i+=1

        self.head = nn.Sequential()
        linear_block(self.linear_layers, self.resolution)

    def forward(self, input):
        x = self.body(input)
        x = x.view(x.size(0), -1)
#         print(x.shape)
        out = self.head(x)
#         print(out.shape)
        return out

Инициализируем модель с помощью рукописного класса

In [None]:
model = VGG()
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# criterion = nn.Softmax(dim=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

Инициализируем модель с через конструктор

In [6]:
model = VGG_like(params)
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
# criterion = nn.Softmax(dim=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

Resolution after conv layer is:  [224 224]
Resolution after conv layer is:  [224 224]
Resolution after maxpool layer is:  [112 112]
Resolution after conv layer is:  [112 112]
Resolution after conv layer is:  [112 112]
Resolution after maxpool layer is:  [56 56]
Resolution after conv layer is:  [56 56]
Resolution after conv layer is:  [56 56]
Resolution after conv layer is:  [56 56]
Resolution after maxpool layer is:  [28 28]
Resolution after conv layer is:  [28 28]
Resolution after conv layer is:  [28 28]
Resolution after conv layer is:  [28 28]
Resolution after maxpool layer is:  [14 14]
Resolution after conv layer is:  [14 14]
Resolution after conv layer is:  [14 14]
Resolution after conv layer is:  [14 14]
Resolution after maxpool layer is:  [7 7]


In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)

model.to(device)

cuda:0


VGG_like(
  (body): Sequential(
    (Block 0_Conv_0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 0_Relu_0): ReLU()
    (Block 0_Conv_1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 0_Relu_1): ReLU()
    (Block 0_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 1_Conv_0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 1_Relu_0): ReLU()
    (Block 1_Conv_1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 1_Relu_1): ReLU()
    (Block 1_MaxPool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (Block 2_Conv_0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 2_Relu_0): ReLU()
    (Block 2_Conv_1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (Block 2_Relu_1): ReLU()
    (Block 2_Conv_2): Conv2d(256, 256, kernel_size=(3, 3), strid

## Основной тренировочный цикл и подсчет метрик.
    Основная метрика accuracy (топ1). Очень не хотелось бы получать ошибку на топ5 accuracy при 10 классах.

In [None]:
for epoch in range(num_epoch):  # loop over the dataset multiple times
    
#     model.train()
    
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
#         inputs, labels = data
#         print(data[1])
        inputs, labels = data[0].to(device), data[1].to(device)
#         print(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
#         print('Loss: ', loss)
        loss.backward()
        optimizer.step()

    #Accuracy train and val
    correct_train, correct_val = 0, 0
    total_train, total_val = 0, 0
    with torch.no_grad():
        for images, labels in trainloader:
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            outputs = outputs.cuda()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        for images, labels in testloader:
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            outputs = outputs.cuda()
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    print("Epoch {}/{}, Loss: {:.3f}, Accuracy_train: {:.3f}, Accuracy_val: {:.3f}".
          format(epoch+1, num_epoch, loss, correct_train/total_train, correct_val/total_val))
#     print('Epoch: ', epoch, ' loss:', loss[0], ' accuracy_val: ', accuracy(outputs, labels))

       
        # print statistics
#         running_loss += loss.item()
#         if i % 10 == 9:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, loss)) #running_loss / 2000
#             print('labels:',  labels)
#             print('outputs: ', outputs)
#             running_loss = 0.0

print('Finished Training')

Epoch 1/200, Loss: 2.319, Accuracy_train: 0.099, Accuracy_val: 0.098
