In [1]:
import time

import torch
import torch.optim as optim

import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import accuracy_score

import numpy as np
import PIL
import random
from IPython.core.display import Image, display

import matplotlib.pyplot as plt
import warnings
warnings.simplefilter("ignore")

In [2]:
print(torch.__version__)
print(torch.cuda.is_available())

print(torch.cuda.device_count(), torch.cuda.get_device_name(0))

print(torch.cuda.current_device())
print(torch.cuda.device(0))
print("CudaVersion : ",torch.version.cuda)

1.0.1.post2
True
4 GeForce GTX 1080 Ti
0
<torch.cuda.device object at 0x7f16b4ae6dd8>
CudaVersion :  9.0.176


In [3]:
DEVICE_ID = 0
DEVICE = torch.device('cuda:%d' % DEVICE_ID)
torch.cuda.set_device(DEVICE_ID)

### Для запуска без GPU раскомментировать и закоментировать код выше
# DEVICE = torch.device('cpu')

In [4]:
np.random.seed(100500)

def data2image(data):
    res = np.transpose(np.reshape(data ,(3, 32,32)), (1,2,0))
    return PIL.Image.fromarray(np.uint8(res))

def imshow(img):
    if isinstance(img, torch.Tensor): img = img.numpy().astype('uint8')
    plt.imshow(np.transpose(img, (1, 2, 0)))
    
def prediction2classes(output_var):
    _, predicted = torch.max(output_var.data, 1)
    predicted.squeeze_()
    classes = predicted.tolist()
    return classes

def make_solution_pytorch(net, input_tensor, a_batch_size):
    res = []
    net = net.eval()
    cur_pos = 0
    while cur_pos <= len(input_tensor):
        outputs = net(input_tensor[cur_pos:cur_pos+a_batch_size])
        res += prediction2classes(outputs)
        cur_pos += a_batch_size
    return res

In [5]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image

class CifarDataset(Dataset):
    def __init__(self, input_path, is_train=True, transform=None):
                        
        data = np.load(input_path)
        if is_train: 
            self.Y, self.X = np.hsplit(data, [1]) 
            self.Y = [item[0] for item in self.Y]
        else: 
            self.X = data
            self.Y = None
            
        self.X = self.X.reshape((self.X.shape[0], 3, 32, 32))
        self.X = self.X.transpose((0, 2, 3, 1)) #приводим к виду (N, H, W, C)
        self.X = [Image.fromarray(img) for img in self.X]
                
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        
        sample = self.X[idx]

        if self.transform: sample = self.transform(sample)

        if self.Y is None: return sample
        else: return (sample, self.Y[idx])

In [6]:
#Тут папки с пробелами НЕ надо заключать в ""
DATA_PATH  = '/home/vv.ivanov/neural-networks-2019/hw-4/data/'
train_path = 'homework_4.train.npy'
test_path  = 'homework_4_no_classes.test.npy'

In [7]:
np_mean = np.mean(
    [
        item[0].numpy() 
        for item in CifarDataset(
            DATA_PATH + train_path, 
            transform=transforms.ToTensor()
        )
    ], 
    axis=(0,2,3)
)
np_std = np.std(
    [item[0].numpy() 
     for item in CifarDataset(
         DATA_PATH + train_path, 
         transform=transforms.ToTensor()
     )], 
    axis=(0,2,3)
)

In [8]:
cifar_transform_norm = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
])

cifar_test_transform_norm = transforms.Compose([    
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
])

In [9]:
dataset_train_norm = CifarDataset(DATA_PATH + train_path, transform=cifar_transform_norm)
dataloader_train_norm = DataLoader(dataset_train_norm, batch_size=128,
                        shuffle=True, num_workers=4)

dataset_test_norm = CifarDataset(DATA_PATH + test_path, is_train=False, transform=cifar_test_transform_norm)
dataloader_test_norm = DataLoader(dataset_test_norm, batch_size=128,
                        shuffle=False, num_workers=1)


def train_network(a_net, 
                  a_device,
                  dataloader_train_norm=dataloader_train_norm,
                  a_epochs=164,
                  a_batch_size=128,
                  a_lr=0.1):
    
    start_time = time.time()
    train_acc = []
    net = a_net.to(a_device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr, weight_decay=0.0001, momentum=0.9)

    prev_epoch_time = start_time
    for epoch in range(a_epochs):  # loop over the dataset multiple times
        #optimizer = torch.optim.SGD(a_net.parameters(), lr=0.1 ** (epoch // 30 + 1) / 5, weight_decay=0.0001, momentum=0.9)
        if epoch == 41:
            optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr/4, weight_decay=0.0001, momentum=0.9)
        elif epoch == 82:
            optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr/10, weight_decay=0.0001, momentum=0.9) 
        elif epoch == 123:
            optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr/100, weight_decay=0.0001, momentum=0.9)
        elif epoch == 150:
            optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr/200, weight_decay=0.0001, momentum=0.9)

        net = net.train()        
        epoch_accuracy = 0.0
        epoch_iters = 0
        for item in dataloader_train_norm:

            epoch_iters += 1

            inputs = item[0].to(a_device)
            labels = item[1].long().to(a_device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    #             prediction2classes(outputs)

    #             print(labels.cpu())
    #             print(prediction2classes(outputs))
            epoch_accuracy += accuracy_score(labels.cpu(), prediction2classes(outputs))

        epoch_accuracy /= epoch_iters
        train_acc.append(epoch_accuracy)

        print("Epoch ", epoch, round(train_acc[-1], 4))
        cur_epoch_time = time.time()
        print('Epoch time : ', cur_epoch_time - prev_epoch_time )
        prev_epoch_time = cur_epoch_time

        my_solution = make_solution(a_net, DEVICE)
        file_name = DATA_PATH + 'my_solution.csv'

        with open(file_name, 'w') as fout:
            print('Id', 'Prediction', sep=',', file=fout)
            for i, prediction in enumerate(my_solution):
                  print(i, prediction, sep=',', file=fout)

    print('Finished Training')
    print("Total time : ", (time.time()-start_time))

    plt.plot(train_acc, label='Train')
    plt.legend()
    #     plt.grid()
    plt.grid(c='grey')

In [10]:
def conv3x3(a_in_planes, a_out_planes, a_stride=1):
    """
    Основной строительный блок конволюций для ResNet
    Включает в себя padding=1 - чтобы размерность сохранялась после его применения
    """
    return nn.Conv2d(a_in_planes, 
                     a_out_planes,  
                     stride=a_stride,
                     kernel_size=3, 
                     padding=1, 
                     bias=False).cuda()

def conv1x1(a_in_planes, a_out_planes):
    """
    Ещё один основной строительный блок конволюций для ResNet
    """
    return nn.Conv2d(a_in_planes, a_out_planes,
                     kernel_size=1, bias=False)

def x_downsample(a_in_planes, a_out_planes, stride):
    # я немного изменил сигнатуру, т.к. нам не всегда нужно
    # уменьшать размерность картинки (в отличие от обычного ResNet)
    return nn.Conv2d(a_in_planes, 
                     a_out_planes,
                     kernel_size=1,
                     stride=stride,
                     bias=False).cuda()

In [23]:
class CifarResidualBlock(nn.Module):
    def __init__(self, a_in_planes, a_out_planes, stride=1):
        super(CifarResidualBlock, self).__init__()
        self.a_in_planes = a_in_planes
        
        self.bn1 = nn.BatchNorm2d(num_features=a_in_planes)
        self.bn2 = nn.BatchNorm2d(num_features=a_out_planes)
        
        self.conv1 = conv3x3(a_in_planes, a_out_planes)
        self.conv2 = conv3x3(a_out_planes, a_out_planes, stride)
        
        self.downsample = x_downsample(a_in_planes, a_out_planes, stride)
        self.dropout = nn.Dropout(p=0.3) # чтобы не переобучалось
        
        self.relu = nn.ReLU(inplace=True)
        self.stride = stride
        
    def forward(self, x):
        # в статье порядок блоков был не такой, как у нас
        # поэтому я его изменил
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)

        out = self.dropout(out)

        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)

        out += self.downsample(x)
        return out

In [24]:
from collections import OrderedDict

class CifarWideResNet(nn.Module):
    def __init__(self):
        super(CifarWideResNet, self).__init__()
        # архитектура из статьи про WideResNet
        self.expansion_factor = 4
        self.in_planes = 16
        # я посмотрел в статье, какая конфигурация сети пробивает baseline, и взял чуть побольше
        self.plane_lst = np.hstack(([16], np.array([16, 32, 64, 128]) * self.expansion_factor))
        # страйды такие, т.к. на каждом блоке, кроме первого, нужно сжимать картинку в два раза
        self.stride_lst = np.hstack(([1], np.full(self.plane_lst.size - 1, 2)))
        # контролирует число resnet-блоков в каждом "слое"
        self.blocks_per_layer = 3

        # всего здесь будет self.plane_lst.size слоёв,
        # в каждом из которых self.blocks_per_layes resnet-блоков
        self.features = self._assemble_resnet_layers()
        
        self.bn = nn.BatchNorm2d(self.plane_lst[-1])
        self.relu = nn.ReLU(inplace=True) 
        self.global_avg_pooling = nn.AvgPool2d(kernel_size=8)
        self.fc = nn.Linear(self.plane_lst[-1], 100)
    
    def _assemble_resnet_layers(self):
        """
        Собирает слои из resnet-блоков
        """
        blocks = OrderedDict([("conv_0", conv3x3(3, self.plane_lst[0]))])
        for i in range(1, self.plane_lst.size):
            blocks[f"res_{i}_0"] = CifarResidualBlock(self.plane_lst[i-1], 
                                                     self.plane_lst[i], 
                                                     stride=self.stride_lst[i-1])
            for j in range(1, self.blocks_per_layer):
                blocks[f"res_{i}_{j}"] = CifarResidualBlock(self.plane_lst[i], 
                                                            self.plane_lst[i])
        return nn.Sequential(blocks)

    def forward(self, x):
        out = self.features(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.global_avg_pooling(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [25]:
def make_solution(a_net, a_device):
    res = []
    net = a_net.eval()
    for item in dataloader_test_norm:
        inputs = item.to(a_device)
        outputs = net(inputs) 

        res += prediction2classes(outputs)
    return res

In [26]:
wresnet_4 = CifarWideResNet()
train_network(wresnet_4, torch.device(DEVICE))

Epoch  0 0.0651
Epoch time :  50.29225397109985
Epoch  1 0.1408
Epoch time :  53.80798840522766
Epoch  2 0.1981
Epoch time :  54.217467069625854
Epoch  3 0.2483
Epoch time :  54.2347891330719
Epoch  4 0.2913
Epoch time :  54.33428072929382
Epoch  5 0.3277
Epoch time :  54.23528170585632
Epoch  6 0.3568
Epoch time :  54.2319540977478
Epoch  7 0.3803
Epoch time :  54.25842833518982
Epoch  8 0.4025
Epoch time :  54.56266474723816
Epoch  9 0.4246
Epoch time :  54.37941336631775
Epoch  10 0.4412
Epoch time :  54.218852519989014
Epoch  11 0.4569
Epoch time :  54.3460168838501
Epoch  12 0.4727
Epoch time :  54.34378981590271
Epoch  13 0.4853
Epoch time :  54.32023811340332
Epoch  14 0.5003
Epoch time :  54.16166853904724
Epoch  15 0.5125
Epoch time :  54.28823399543762
Epoch  16 0.52
Epoch time :  54.453396797180176
Epoch  17 0.5297
Epoch time :  54.49353361129761
Epoch  18 0.536
Epoch time :  54.521279096603394
Epoch  19 0.5456
Epoch time :  54.427082777023315
Epoch  20 0.5556
Epoch time :  

KeyboardInterrupt: 

In [28]:
my_solution = make_solution(wresnet_4, DEVICE)

In [30]:
file_name = DATA_PATH + 'newest_submission.csv'

with open(file_name, 'w') as fout:
    print('Id', 'Prediction', sep=',', file=fout)
    for i, prediction in enumerate(my_solution):
        print(i, prediction, sep=',', file=fout)
        
# from google.colab import files
# files.download(file_name)