# Заголовок

In [1]:
import torch
import torchvision
from torch import nn
import pandas as pd
import numpy as np

In [2]:
# hyper params
batch_size = 32
num_epoch = 200

### Создаем DataLoader попутно предобрабатывая данные
- Предварительный смотр данных можно найти в VGG_like.ipynb

In [3]:
from torchvision import transforms, datasets

train_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.RandomSizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
test_transform = transforms.Compose([
        transforms.Resize((260,260)),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

trainset = datasets.ImageFolder(root='../imagenette/imagenette2-320/train/', transform=train_transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True)
testset = datasets.ImageFolder(root='../imagenette/imagenette2-320/val/', transform=test_transform)
testloader = torch.utils.data.DataLoader(testset, #batch_size=batch_size,
                                         shuffle=False)

  "please use transforms.RandomResizedCrop instead.")


## Создаем конструктор ResNet-like сетей.

### В конструктор подается словарь с параметрами сети:

 - body_input - 
 - conv_layers - 

 - class_qty - кол-во классов.
 - print_dim - печатает параметры тензора на выходе из соответствующего слоя.

Примеры:
ResNet-18: 
block_qty = [2,2,2,2]
weight_reduction = False

ResNet-18: 
block_qty = [3,4,6,3]
weight_reduction = False

ResNet-50:
block_qty = [3,4,6,3]
weight_reduction = True

ResNet-101:
block_qty = [3,4,23,3]
weight_reduction = True

ResNet-152:
block_qty = [3,8,36,3]
weight_reduction = True

In [4]:
params = {
    'net_input': [224, 224, 3], # list height, width, channel
    'first_layer': [7, 64, 2, 3], # params of first conv_layer. [kernel_size , channel_qty, stride, padding]
    'first_maxpool': [3 ,2, 1], # params of first maxpool_layer. [kernel_size , stride, padding]
    'blocks_qty': [2,2,2,2], # list of lists. each layer should be list: qty conv layers, stride same or valid, padding
    'weight_reduction': False, # using block with less weights qty
    'class_qty': [10],
    'print_dim': True # True if you want to show how to change the tensor dimention via convolutional layers
}

In [44]:
class ResNet_like(nn.Module):

    def __init__(self, params):
        self.net_input = params['net_input']
        self.first_layer = params['first_layer']
        self.first_maxpool = params['first_maxpool']
        self.blocks_qty = params['blocks_qty']
        self.class_qty = params['class_qty']
        self.weight_reduction = params['weight_reduction']
        self.print_dim = params['print_dim']
        
        super().__init__()
        
        def block_contruct(block, no_in_block, resolution=None):
            

            self.channel_out = 16*(2**block)
            print('Input channels [in, out]: ', self.channel_in, self.channel_out)
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Conv_%d_1'%(block, no_in_block), 
                        module=nn.Conv2d(
                        self.channel_in,
                        self.channel_out,
                        kernel_size=3, stride=1, padding=(1,1))
                        )
            self.channel_in = self.channel_out
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_BN_%d_1'%(block, no_in_block),
                        module=nn.BatchNorm2d(self.channel_out)
                        )
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Relu_%d_1'%(block, no_in_block),
                        module=nn.ReLU()
                        )
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Conv_%d_2'%(block, no_in_block), 
                        module=nn.Conv2d(
                        self.channel_in,
                        self.channel_out,
                        kernel_size=3, stride=1, padding=1)
                        )
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_BN_%d_2'%(block, no_in_block),
                        module=nn.BatchNorm2d(self.channel_out)
                        )
            print('Out channels [in, out]: ', self.channel_in, self.channel_out)
      
        def reduction_block_construct(block, no_in_block, resolution=None):
                
            self.channel_out = 16*(2**block)
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Conv_%d_1'%(block, no_in_block), 
                        module=nn.Conv2d(
                        self.channel_in,
                        self.channel_out,
                        kernel_size=3, stride=1, padding=1)
                        )
            self.channel_out = 16*(2**block)*4
            
            self.channel_in = self.channel_out
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_BN_%d_1'%(block, no_in_block),
                        module=nn.BatchNorm2d(self.channel_out)
                        )
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Relu_%d_1'%(block, no_in_block),
                        module=nn.ReLU()
                        )

            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Conv_%d_2'%(block, no_in_block), 
                        module=nn.Conv2d(
                        self.channel_in,
                        self.channel_out,
                        kernel_size=3, stride=1, padding=1)
                        )
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_BN_%d_2'%(block, no_in_block),
                        module=nn.BatchNorm2d(self.channel_out)
                        )
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Relu_%d_2'%(block, no_in_block),
                        module=nn.ReLU()
                        )

            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_Conv_%d_3'%(block, no_in_block), 
                        module=nn.Conv2d(
                        self.channel_in,
                        self.channel_out,
                        kernel_size=3, stride=1, padding=1)
                        )
            self.module_dict['Block_%d_%d'%(block, no_in_block)].add_module(
                        name='Block_%d_BN_%d_2'%(block, no_in_block),
                        module=nn.BatchNorm2d(self.channel_out)
                        )

        
        # initial main net
        self.first = nn.Sequential()
        self.channel_in = self.net_input[2]
        self.channel_out = self.first_layer[1]
        self.resolution = np.array([self.net_input[0], self.net_input[1]])
        
        # first conv block
        self.first.add_module(name='Block_1_Conv_1_1', module=nn.Conv2d(
                        self.channel_in, 
                        self.channel_out, 
                        kernel_size=self.first_layer[0], 
                        stride=self.first_layer[2], 
                        padding=self.first_layer[3]))
        self.channel_in = self.channel_out
        self.resolution = (self.resolution - self.first_layer[0] + 1 + (self.first_layer[3] * 2)) // self.first_layer[2]
        self.first.add_module(name='Block_1_MaxPool_1', module=nn.MaxPool2d(
                        kernel_size=self.first_maxpool[0],
                        stride=self.first_maxpool[1],
                        padding=self.first_maxpool[2]))
        self.resolution = (self.resolution - self.first_maxpool[0] + 1 + (self.first_maxpool[2] * 2)) // self.first_maxpool[1]
        
        self.module_dict = {}
        
        block=2
        for block_number in self.blocks_qty:
            for no_in_block in range(block_number):
                if self.weight_reduction:
                    self.module_dict['Block_%d_%d'%(block, no_in_block+1)] = nn.Sequential()
                    reduction_block_construct(block, no_in_block+1) #params for correct layers naming
                else:
                    self.module_dict['Block_%d_%d'%(block, no_in_block+1)] = nn.Sequential()
                    block_contruct(block, no_in_block+1)  #params for correct layers naming
                # add maxpool after each block
                if no_in_block + 1 == block_number and block - 1 != len(self.blocks_qty):
                    self.resolution = self.resolution // 2
            block+=1
        
        self.out = nn.Sequential()
        if self.weight_reduction:
            self.linear_input = self.resolution[0] * self.resolution[1] * (32*(2**(len(self.blocks_qty)))*4)
        else:
            self.linear_input = self.resolution[0] * self.resolution[1] * (32*(2**(len(self.blocks_qty))))
        self.out.add_module(name='output',module=nn.Linear(self.linear_input, self.class_qty[0]))


        
    def forward(self, x):
        x = self.first(x)
        block = 2
        for block_number in self.blocks_qty:
            for no_in_block in range(block_number):
                print(x.shape)
                x_out = self.module_dict['Block_%d_%d' % (block, no_in_block + 1)](x)
                print(x.shape, x_out.shape)
                if x.shape == x_out.shape:
                    x = x_out + x
                else:
                    x = nn.functional.conv2d(x, torch.randn(x_out.shape[1], *x.shape[1:]), stride=1)
                    x = x_out + x
                x = nn.functional.relu(x)
                if no_in_block + 1 == block_number and block - 1 != len(self.blocks_qty):
                    x = nn.functional.max_pool2d(x, kernel_size=2, stride=2)
            block += 1
        # x = torch.mean(x.view(x.size(0), x.size(1), -1), dim=2)
        # x = nn.fuctional.adaptive_avg_pool2d(x, (1, 1)) # це вопрос

        x = x.view(x.size(0), -1)
        #         print(x.shape)
        out = self.out(x)
        #         print(out.shape)
        return out


In [89]:
def foo(i, j):
    d['Block_%d_%d'%(i,j)].add_module(name='Block_%d_Conv_%d_1'%(i, j), 
                                module=nn.Conv2d(
                                64,
                                64,
                                kernel_size=3, stride=1, padding=1)
                                )

d = {}
for i in range(5):
    for j in range(4):
        d['Block_%d_%d'%(i,j)] = nn.Sequential()
        foo(i,j)
print(d['Block_3_2'])

Sequential(
  (Block_3_Conv_2_1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)


Инициализируем модель с через конструктор

In [45]:
model = ResNet_like(params)
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

Input channels [in, out]:  64 64
Out channels [in, out]:  64 64
Input channels [in, out]:  64 64
Out channels [in, out]:  64 64
Input channels [in, out]:  64 128
Out channels [in, out]:  128 128
Input channels [in, out]:  128 128
Out channels [in, out]:  128 128
Input channels [in, out]:  128 256
Out channels [in, out]:  256 256
Input channels [in, out]:  256 256
Out channels [in, out]:  256 256
Input channels [in, out]:  256 512
Out channels [in, out]:  512 512
Input channels [in, out]:  512 512
Out channels [in, out]:  512 512


Загружаем модель на видеокарту.

In [46]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

print(device)

model.to(device)

cuda:1


ResNet_like(
  (first): Sequential(
    (Block_1_Conv_1_1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (Block_1_MaxPool_1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (out): Sequential(
    (output): Linear(in_features=25088, out_features=10, bias=True)
  )
)

## Основной тренировочный цикл и подсчет метрик.
    Основная метрика accuracy (топ1). Очень не хотелось бы получать ошибку на топ5 accuracy при 10 классах.

In [47]:
for epoch in range(num_epoch):  # loop over the dataset multiple times
    
#     model.train()
    
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
#         inputs, labels = data
#         print(data[1])
        inputs, labels = data[0].to(device), data[1].to(device)
#         print(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
#         print('Loss: ', loss)
        loss.backward()
        optimizer.step()

    #Accuracy train and val
    correct_train, correct_val = 0, 0
    total_train, total_val = 0, 0
    with torch.no_grad():
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            outputs = outputs.to(device)
            _, predicted = torch.max(outputs.data, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    print("Epoch {}/{}, Loss: {:.3f}, Accuracy_train: {:.3f}, Accuracy_val: {:.3f}".
          format(epoch+1, num_epoch, loss, correct_train/total_train, correct_val/total_val))
#     print('Epoch: ', epoch, ' loss:', loss[0], ' accuracy_val: ', accuracy(outputs, labels))

       
        # print statistics
#         running_loss += loss.item()
#         if i % 10 == 9:    # print every 2000 mini-batches
#             print('[%d, %5d] loss: %.3f' %
#                   (epoch + 1, i + 1, loss)) #running_loss / 2000
#             print('labels:',  labels)
#             print('outputs: ', outputs)
#             running_loss = 0.0

print('Finished Training')

torch.Size([32, 64, 56, 56])


RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same