In [1]:
%matplotlib inline

In [2]:
import torch
import torch.optim as optim

import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from torch.utils.data.sampler import SubsetRandomSampler

import numpy as np
import PIL
import random
from IPython.core.display import Image, display

import matplotlib.pyplot as plt

In [3]:
DEVICE_ID = 0
DEVICE = torch.device('cuda:%d' % DEVICE_ID)
torch.cuda.set_device(DEVICE_ID)

In [4]:
### Для запуска без GPU раскомментировать и закоментировать код выше
# DEVICE = torch.device('cpu')

In [5]:
np.random.seed(100500)

def data2image(data):
    res = np.transpose(np.reshape(data ,(3, 32,32)), (1,2,0))
    return PIL.Image.fromarray(np.uint8(res))

def imshow(img):
    if isinstance(img, torch.Tensor): img = img.numpy().astype('uint8')
    plt.imshow(np.transpose(img, (1, 2, 0)))
    
def prediction2classes(output_var):
    _, predicted = torch.max(output_var.data, 1)
    predicted.squeeze_()
    classes = predicted.tolist()
    return classes

def make_solution_pytorch(net, input_tensor, a_batch_size):
    res = []
    net = net.eval()
    cur_pos = 0
    while cur_pos <= len(input_tensor):
        outputs = net(input_tensor[cur_pos:cur_pos+a_batch_size])
        res += prediction2classes(outputs)
        cur_pos += a_batch_size
    return res

In [6]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image

class CifarDataset(Dataset):
    def __init__(self, X, Y=None, is_train=True, transform=None):

        self.X = X
        self.Y = Y
        
        if is_train: 
            self.Y = [item[0] for item in self.Y]
        else: 
            self.Y = None
        
        self.X = self.X.reshape((self.X.shape[0], 3, 32, 32))
        self.X = self.X.transpose((0, 2, 3, 1)) #приводим к виду (N, H, W, C)
        self.X = [Image.fromarray(img) for img in self.X]
                
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        
        sample = self.X[idx]

        if self.transform: sample = self.transform(sample)

        if self.Y is None: return sample
        else: return (sample, self.Y[idx])

In [7]:
#Надо поменять пути на свои до файлов с kaggle
DATA_PATH  = '/home/superstraz/Track_NN/CNN_Architectures/'
train_path = 'train'
test_path  = 'homework_4_no_classes.test.npy'

import pickle

with open(train_path, 'rb') as fo:
    data = pickle.load(fo, encoding='bytes')

X_full, Y_full, = data[b'data'], data[b'fine_labels']
Y_full = np.array(Y_full).reshape(50000, 1)
X_test = np.load(DATA_PATH + test_path)

In [8]:
np_mean = np.mean([item[0].numpy() for item in CifarDataset(X_full, Y_full, transform=transforms.ToTensor())], axis=(0,2,3))
np_std = np.std([item[0].numpy() for item in CifarDataset(X_full, Y_full, transform=transforms.ToTensor())], axis=(0,2,3))

In [9]:
print(X_test.shape)

(10000, 3072)


In [10]:
cifar_transform_norm = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
]
)

cifar_test_transform_norm = transforms.Compose([    
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
]
)

In [11]:
dataset_train_norm = CifarDataset(X_full, Y_full, transform=cifar_transform_norm)
dataloader_train_norm = DataLoader(dataset_train_norm, batch_size=128,
                        shuffle=True, num_workers=4)

dataset_test_norm = CifarDataset(X_test, is_train=False, transform=cifar_test_transform_norm)
dataloader_test_norm = DataLoader(dataset_test_norm, batch_size=128,
                        shuffle=False, num_workers=1)

def train_network(a_net, 
                  a_device,
                  dataloader_train_norm=dataloader_train_norm,
                  a_epochs=164,
                  a_batch_size=128,
                  a_lr=0.1):
    
    train_acc = []
    net = a_net.to(a_device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(a_net.parameters(), lr=a_lr, weight_decay=5e-4, momentum=0.9, nesterov=True)

    
    for epoch in range(a_epochs):  # loop over the dataset multiple times
        
        print("Epoch ", epoch)
        
        net = net.train()        
        epoch_accuracy = 0.0
        epoch_iters = 0
        for item in dataloader_train_norm:
            
            epoch_iters += 1

            inputs = item[0].to(a_device)
            labels = item[1].long().to(a_device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_accuracy += accuracy_score(labels, prediction2classes(outputs))

        epoch_accuracy /= epoch_iters
        train_acc.append(epoch_accuracy)
        
        print(round(train_acc[-1], 4))

    print('Finished Training')
    
    plt.plot(train_acc, label='Train')
    plt.legend()
    plt.grid()

In [12]:
DOWNSAMPLE_COEF = 2

def conv3x3(a_in_planes, a_out_planes, a_stride=1):
    """
    Основной строительный блок конволюций для ResNet
    Включает в себя padding=1 - чтобы размерность сохранялась после его применения
    """
    return nn.Conv2d(a_in_planes, a_out_planes,  stride=a_stride,
                     kernel_size=3, padding=1, bias=False)

def x_downsample(a_in_channels):
     return nn.Conv2d(a_in_channels, 
               a_in_channels*DOWNSAMPLE_COEF,
               kernel_size=1,
               stride=2,
               bias=False)

In [13]:
DOWNSAMPLE_COEF = 2

def x_downsample(a_in_channels):
     return nn.Conv2d(a_in_channels, 
               a_in_channels*DOWNSAMPLE_COEF,
               kernel_size=1,
               stride=2,
               bias=False)

class CifarResidualBlock(nn.Module):
    def __init__(self, a_in_channels, make_downsample=False, use_skip_connection=True):
        super(CifarResidualBlock, self).__init__()
        self.use_skip_connection = use_skip_connection
        self.make_downsample = make_downsample
        
        if make_downsample: 
            coef = DOWNSAMPLE_COEF
        else: 
            coef = 1  
            
        self.in_c = a_in_channels
        self.out_c = a_in_channels * coef
        
        self.conv1 = conv3x3(self.in_c, self.out_c, coef)
        self.bn1 = nn.BatchNorm2d(self.out_c)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        
        self.conv2 = conv3x3(self.out_c, self.out_c)
        self.bn2 = nn.BatchNorm2d(self.out_c)
        
        self.down_sample = x_downsample(a_in_channels)
            
    def forward(self, x):
        result = self.conv1(x)
        result = self.bn1(result)
        result = self.relu1(result)
        
        result = self.conv2(result)
        result = self.bn2(result)
        
        if self.use_skip_connection:
            if self.make_downsample:
                addition = self.down_sample(x)
            else:
                addition = x
            result += addition                    
            
        result = self.relu2(result)
        return result
    
    
class CifarResidualBottleneckBlock(nn.Module):
    
    BOTTLENECK_COEF = 4
    
    def __init__(self, a_in_channels, make_downsample=False, use_skip_connection=True):
        super(CifarResidualBottleneckBlock, self).__init__()
        self.use_skip_connection = use_skip_connection
        self.make_downsample = make_downsample
        
        if make_downsample: 
            coef = DOWNSAMPLE_COEF
        else: 
            coef = 1  
                        
        mid_channels = a_in_channels * coef // self.BOTTLENECK_COEF
        
        self.conv1 = nn.Conv2d(a_in_channels, mid_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        self.conv2 = nn.Conv2d(mid_channels,  mid_channels,  kernel_size=3, stride=coef, padding = 1,  bias=False)
        
        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.conv3 = nn.Conv2d(mid_channels, a_in_channels * coef, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(a_in_channels * coef)
        self.relu = nn.ReLU()
        
        self.down_sample = x_downsample(a_in_channels)
            
    def forward(self, x):   
        result = self.conv1(x)
        result = self.bn1(result)
        result = self.relu(result)

        result = self.conv2(result)
        result= self.bn2(result)
        result = self.relu(result)

        result = self.conv3(result)
        result = self.bn3(result)
        
        if self.make_downsample:
            addition = self.down_sample(x)
        else:
            addition = x

        if self.use_skip_connection:
            result += addition  

        result = self.relu(result)
        return result

In [14]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class BasicBlock(nn.Module):
    def __init__(self, in_planes, out_planes, stride, dropRate=0.0):
        super(BasicBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_planes)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_planes, out_planes, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.droprate = dropRate
        self.equalInOut = (in_planes == out_planes)
        self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
                               padding=0, bias=False) or None
    def forward(self, x):
        if not self.equalInOut:
            x = self.relu1(self.bn1(x))
        else:
            out = self.relu1(self.bn1(x))
        out = self.relu2(self.bn2(self.conv1(out if self.equalInOut else x)))
        if self.droprate > 0:
            out = F.dropout(out, p=self.droprate, training=self.training)
        out = self.conv2(out)
        return torch.add(x if self.equalInOut else self.convShortcut(x), out)

class NetworkBlock(nn.Module):
    def __init__(self, nb_layers, in_planes, out_planes, block, stride, dropRate=0.0):
        super(NetworkBlock, self).__init__()
        self.layer = self._make_layer(block, in_planes, out_planes, nb_layers, stride, dropRate)
    def _make_layer(self, block, in_planes, out_planes, nb_layers, stride, dropRate):
        layers = []
        for i in range(nb_layers):
            layers.append(block(i == 0 and in_planes or out_planes, out_planes, i == 0 and stride or 1, dropRate))
        return nn.Sequential(*layers)
    def forward(self, x):
        return self.layer(x)

class WideResNet(nn.Module):
    def __init__(self, depth, num_classes, widen_factor=5, dropRate=0.0):
        super(WideResNet, self).__init__()
        nChannels = [16, 16*widen_factor, 32*widen_factor, 64*widen_factor]
        assert((depth - 4) % 6 == 0)
        n = (depth - 4) // 6
        block = BasicBlock
        # 1st conv before any network block
        self.conv1 = nn.Conv2d(3, nChannels[0], kernel_size=3, stride=1,
                               padding=1, bias=False)
        # 1st block
        self.block1 = NetworkBlock(n, nChannels[0], nChannels[1], block, 1, dropRate)
        # 2nd block
        self.block2 = NetworkBlock(n, nChannels[1], nChannels[2], block, 2, dropRate)
        # 3rd block
        self.block3 = NetworkBlock(n, nChannels[2], nChannels[3], block, 2, dropRate)
        # global average pooling and classifier
        self.bn1 = nn.BatchNorm2d(nChannels[3])
        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nChannels[3], num_classes)
        self.nChannels = nChannels[3]

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
    def forward(self, x):
        out = self.conv1(x)
        out = self.block1(out)
        out = self.block2(out)
        out = self.block3(out)
        out = self.relu(self.bn1(out))

        out = F.avg_pool2d(out, 8)
        out = out.view(-1, self.nChannels)
        out = self.fc(out)
        return out

In [15]:
resnet = WideResNet(16, 100, dropRate=0.3)
%time train_network(resnet, torch.device(DEVICE), a_lr=0.1, a_epochs=200)

Epoch  0
0.0964
Epoch  1
0.2232
Epoch  2
0.3252
Epoch  3
0.3958
Epoch  4
0.4473
Epoch  5
0.4855
Epoch  6
0.5119
Epoch  7
0.5347
Epoch  8
0.5505
Epoch  9
0.5651
Epoch  10
0.5805
Epoch  11
0.5908
Epoch  12
0.5977
Epoch  13
0.6062
Epoch  14
0.6126
Epoch  15
0.6173
Epoch  16
0.6222
Epoch  17
0.6263
Epoch  18
0.6333
Epoch  19
0.6343
Epoch  20
0.638
Epoch  21
0.6405
Epoch  22


Process Process-91:
Process Process-92:
Process Process-90:
Process Process-89:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/superstraz/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/superstraz/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/superstraz/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/superstraz/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/superstraz/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/superstraz/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/superstraz/anacond

KeyboardInterrupt: 

# Важно переключить сеть в режим eval - иначе dropout будет работать некорректно

In [16]:
def make_solution(a_net, a_device):
    res = []
    net = a_net.eval()
    for item in dataloader_test_norm:
        inputs = item.to(a_device)
        outputs = net(inputs) 

        res += prediction2classes(outputs)
    return res

In [17]:
# my_solution = make_solution(dense_net, DEVICE)
my_solution = make_solution(resnet, DEVICE)

In [18]:
with open('my_solution.csv', 'w') as fout:
    print('Id', 'Prediction', sep=',', file=fout)
    for i, prediction in enumerate(my_solution):
        print(i, prediction, sep=',', file=fout)

In [None]:
epoch_iters = 0
epoch_accuracy = 0
resnet = resnet.eval()
for item in dataloader_train_norm:
            
    epoch_iters += 1

    inputs = item[0].cuda()
    labels = item[1].long().cuda()

            # zero the parameter gradients
    outputs = resnet(inputs)
       
       
    epoch_accuracy += accuracy_score(labels, prediction2classes(outputs))
epoch_accuracy /= epoch_iters
        
print("Test ", epoch_accuracy)