In [1]:
%matplotlib inline

In [2]:
import torch
import torch.optim as optim

import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import accuracy_score

import numpy as np
import PIL
import random
from IPython.core.display import Image, display

import matplotlib.pyplot as plt

from torch.utils.data import Dataset, DataLoader
from PIL import Image

from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T

import numpy as np

In [3]:
DEVICE_ID = 0
DEVICE = torch.device('cuda:%d' % DEVICE_ID)
torch.cuda.set_device(DEVICE_ID)

In [4]:
DTYPE = torch.float32
print_every = 500

In [40]:
np.random.seed(100500)

def data2image(data):
    res = np.transpose(np.reshape(data ,(3, 32,32)), (1,2,0))
    return PIL.Image.fromarray(np.uint8(res))

def imshow(img):
    if isinstance(img, torch.Tensor): img = img.numpy().astype('uint8')
    plt.imshow(np.transpose(img, (1, 2, 0)))
    
def prediction2classes(output_var):
    _, predicted = torch.max(output_var.data, 1)
    predicted.squeeze_()
    classes = predicted.tolist()
    return classes

def make_solution_pytorch(net, input_tensor, a_batch_size):
    res = []
    net = net.eval()
    cur_pos = 0
    while cur_pos <= len(input_tensor):
        outputs = net(input_tensor[cur_pos:cur_pos+a_batch_size])
        res += prediction2classes(outputs)
        cur_pos += a_batch_size
    return res

In [41]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image

class CifarDataset(Dataset):
    def __init__(self, input_path, is_train=True, transform=None):
                        
        data = np.load(input_path)
        if is_train: 
            self.Y, self.X = np.hsplit(data, [1]) 
            self.Y = [item[0] for item in self.Y]
        else: 
            self.X = data
            self.Y = None
            
        self.X = self.X.reshape((self.X.shape[0], 3, 32, 32))
        self.X = self.X.transpose((0, 2, 3, 1)) #приводим к виду (N, H, W, C)
        self.X = [Image.fromarray(img) for img in self.X]
                
        self.transform = transform

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        
        sample = self.X[idx]

        if self.transform: sample = self.transform(sample)

        if self.Y is None: return sample
        else: return (sample, self.Y[idx])

In [42]:
firstData = dset.CIFAR100('cifar-100-python/', train=True, download=True, transform = transforms.ToTensor())
np_mean = np.mean([item[0].numpy() for item in firstData], axis=(0,2,3))
np_std = np.std([item[0].numpy() for item in firstData], axis=(0,2,3))

Files already downloaded and verified


In [54]:
NUM_TRAIN = 49000
BATCH_SIZE = 128

transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
]
)

cifar_test_transform_norm = transforms.Compose([    
    transforms.ToTensor(),
    transforms.Normalize(torch.FloatTensor(np_mean), torch.FloatTensor(np_std))
]
)

data_train = dset.CIFAR100('cifar-100-python/', train=True, download=True, transform = transform)
                 
loader_train = DataLoader(data_train, batch_size=BATCH_SIZE, 
                          sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

                                 
data_val = dset.CIFAR100('cifar-100-python/', train=True, download=True, transform=transform)                
loader_val = DataLoader(data_val, batch_size=BATCH_SIZE, 
                        sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))


Files already downloaded and verified
Files already downloaded and verified


In [44]:
DATA_PATH  = '/home/neo/04/'
train_path = 'homework_4.train.npy'
test_path  = 'homework_4_no_classes.test.npy'
dataset_test_norm = CifarDataset(DATA_PATH + test_path, is_train=False, transform=cifar_test_transform_norm)
dataloader_test_norm = DataLoader(dataset_test_norm, batch_size=128,
                        shuffle=False, num_workers=1)

In [68]:
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval()  # set model to evaluation mode
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=DEVICE, dtype=DTYPE)  # move to device, e.g. GPU
            y = y.to(device=DEVICE, dtype=torch.long)
            scores = model(x)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [70]:
def train(model, optimizer, epochs=1):
    criterion = nn.CrossEntropyLoss()
    model = model.to(device=DEVICE) 
    for e in range(epochs):
        model.train()
        for t, (x, y) in enumerate(loader_train):
            x = x.to(device=DEVICE, dtype=DTYPE)
            y = y.to(device=DEVICE, dtype=torch.long)
            scores = model(x)
            loss = criterion(scores, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        
        print('epoch %d, loss = %.4f' % (e, loss.item()))
        check_accuracy(loader_val, model)
        print()

In [47]:
#Ma sweet machinery

#Tricky, Max pool and Avg pool
def speciallyResizedX(x):
    maxPool = nn.MaxPool2d(3, 2, 1)
    avgPool = nn.AvgPool2d(3, 2, 1)
    N, C, H, W = x.size()
    newX = torch.zeros((N, C * 2, int(H / 2), int(W / 2)))
    newX[:, 0 : C, :, :] = maxPool(x)
    newX[:, C : 2 * C, :, :] = avgPool(x)
    return newX


# Conv + BatchNorm + Relu at the end
class ComplicatedConvolutionalLayer(nn.Module):
    def __init__(self, channels, noRelu = False):
        super(ComplicatedConvolutionalLayer, self).__init__()
        self.noRelu = noRelu
        
        self.conv = nn.Conv2d(channels, channels,  stride=1,
                     kernel_size=3, padding=1, bias=False)
        self.batchNorm = nn.BatchNorm2d(channels)
        if not noRelu:
            self.relu = nn.LeakyReLU()
            
    
    def forward(self, x):
        x = self.conv(x)
        x = self.batchNorm(x)
        if not self.noRelu:
            x = self.relu(x)
        return x
                 
        
#double amount of chanels while downsampling twice
#Actually, only another kernel size = 3, just curiosity
class DownsamplingConvolutionalLayer(nn.Module):
    def __init__(self, channels):
        super(DownsamplingConvolutionalLayer, self).__init__()
        
        self.conv = nn.Conv2d(channels, 2 * channels,  stride=2,
                     kernel_size=3, padding=1, bias=False)
        self.batchNorm = nn.BatchNorm2d(2 * channels)
        self.relu = nn.LeakyReLU()
    
    def forward(self, x):
        x = self.conv(x)
        x = self.batchNorm(x)
        x = self.relu(x)
        return x


In [48]:
class CifarResidualBlock(nn.Module):
    def __init__(self, a_in_channels, make_downsample=False, use_skip_connection=True):
        super(CifarResidualBlock, self).__init__()
        self.use_skip_connection = use_skip_connection
        self.downsample = make_downsample
        if not make_downsample:
            self.conv1 = ComplicatedConvolutionalLayer(a_in_channels)
            self.conv2 = ComplicatedConvolutionalLayer(a_in_channels, noRelu = True)
            self.lastRelu = nn.LeakyReLU()
        else:
            self.conv1 = DownsamplingConvolutionalLayer(a_in_channels)
            self.conv2 = ComplicatedConvolutionalLayer(2 * a_in_channels, noRelu = True)
            self.lastRelu = nn.LeakyReLU()
            
            
    def forward(self, x):
        if self.use_skip_connection:
            if self.downsample:
                variableToSaveX = speciallyResizedX(x)
            else:
                variableToSaveX = x.clone()
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.lastRelu(x + variableToSaveX)
        else:
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.lastRelu(x)        
        return x

In [49]:
# Conv + BatchNorm + Relu at the end + an ability to vary parameters!

class MoreComplicatedConvolutionalLayer(nn.Module):
    def __init__(self, inChannels, outChannels, kernelSize = 3, noRelu = False):
        super(MoreComplicatedConvolutionalLayer, self).__init__()
        self.noRelu = noRelu
        
        if (kernelSize == 3):
            computedPadding = 1
        else:
            computedPadding = 0
        
        self.conv = nn.Conv2d(inChannels, outChannels, stride=1, kernel_size=kernelSize, padding=computedPadding, bias=False)
        self.drop = nn.Dropout2d(0.01)
        self.batchNorm = nn.BatchNorm2d(outChannels)
        if not noRelu:
            self.relu = nn.LeakyReLU()
            
    
    def forward(self, x):
        x = self.conv(x)  
        x = self.drop(x)
        x = self.batchNorm(x)
        if not self.noRelu:
            x = self.relu(x)
        return x
    


In [50]:
BOTTLENECK_COEF = 4
BOTTLENECK_KERNEL = 1

class CifarResidualBottleneckBlock(nn.Module):
    def __init__(self, inChannels, make_downsample=False, use_skip_connection=True):
        super(CifarResidualBottleneckBlock, self).__init__()
        self.use_skip_connection = use_skip_connection
        self.downsample = make_downsample
        bottleneckedChannels = int (inChannels / BOTTLENECK_COEF)
        
        if not make_downsample:
            self.conv1 = MoreComplicatedConvolutionalLayer(inChannels, bottleneckedChannels, BOTTLENECK_KERNEL)
            self.conv2 = MoreComplicatedConvolutionalLayer(bottleneckedChannels, bottleneckedChannels)
            self.conv3 = MoreComplicatedConvolutionalLayer(bottleneckedChannels, inChannels, BOTTLENECK_KERNEL, noRelu = True)
            self.lastRelu = nn.LeakyReLU()
        else:
            self.conv1 = MoreComplicatedConvolutionalLayer(inChannels, bottleneckedChannels, BOTTLENECK_KERNEL)
            self.conv2 = DownsamplingConvolutionalLayer(bottleneckedChannels)
            self.conv3 = MoreComplicatedConvolutionalLayer(2 * bottleneckedChannels, 2 * inChannels, BOTTLENECK_KERNEL, noRelu = True)
            self.lastRelu = nn.LeakyReLU()
    
            
    def forward(self, x):
        if self.use_skip_connection:
            if self.downsample:
                variableToSaveX = speciallyResizedX(x).to(torch.device(DEVICE))
            else:
                variableToSaveX = x.to(torch.device(DEVICE))
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.conv3(x)
            x = self.lastRelu(x + variableToSaveX)
        else:
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.conv3(x)
            x = self.lastRelu(x)        
        return x

In [52]:
class MaResNet(nn.Module):
    
    def __init__(self):
        super(MaResNet, self).__init__()
        
        #TODO нужно добавить блоков resnet и других слоев при необходимости
        
        self.features = nn.Sequential()
    
        
        self.features.add_module('first', MoreComplicatedConvolutionalLayer(3, 32))
        self.features.add_module('res1', CifarResidualBottleneckBlock(32))
        self.features.add_module('res2', CifarResidualBottleneckBlock(32))
        self.features.add_module('res3', CifarResidualBottleneckBlock(32, make_downsample = True))
        self.features.add_module('res4', CifarResidualBottleneckBlock(64))
        self.features.add_module('res5', CifarResidualBottleneckBlock(64))
        self.features.add_module('res6', CifarResidualBottleneckBlock(64))
        self.features.add_module('res7', CifarResidualBottleneckBlock(64, make_downsample = True))
        self.features.add_module('res8', CifarResidualBottleneckBlock(128))
        self.features.add_module('res9', CifarResidualBottleneckBlock(128))
        self.features.add_module('res10', CifarResidualBottleneckBlock(128))
        self.features.add_module('res11', CifarResidualBottleneckBlock(128))
        self.global_avg_pooling = nn.AvgPool2d(3, 2, 1)
        self.fc_classifier = nn.Linear(128 * 4 * 4, 100)
        
    def forward(self, x):
        x = self.features(x)
        x = self.global_avg_pooling(x)        
        x = x.view((x.size()[0], -1))        
        x = self.fc_classifier(x)        
        return x

In [76]:
#resnet = MaResNet()
#resnet.load_state_dict(torch.load("ResNetDump_Dubovik4"))
optimizer = torch.optim.SGD(resnet.parameters(), lr=0.0001, momentum=0.90)
train(resnet, optimizer, epochs=50)

epoch 0, loss = 1.3703
Checking accuracy on validation set
Got 592 / 1000 correct (59.20)

epoch 1, loss = 0.9877
Checking accuracy on validation set
Got 582 / 1000 correct (58.20)

epoch 2, loss = 1.3437
Checking accuracy on validation set
Got 598 / 1000 correct (59.80)

epoch 3, loss = 1.0807
Checking accuracy on validation set
Got 597 / 1000 correct (59.70)

epoch 4, loss = 1.4374
Checking accuracy on validation set
Got 564 / 1000 correct (56.40)

epoch 5, loss = 1.0432
Checking accuracy on validation set
Got 576 / 1000 correct (57.60)

epoch 6, loss = 1.0890
Checking accuracy on validation set
Got 582 / 1000 correct (58.20)

epoch 7, loss = 1.1952
Checking accuracy on validation set
Got 591 / 1000 correct (59.10)

epoch 8, loss = 1.2924
Checking accuracy on validation set
Got 573 / 1000 correct (57.30)

epoch 9, loss = 1.3742
Checking accuracy on validation set
Got 581 / 1000 correct (58.10)

epoch 10, loss = 1.1857
Checking accuracy on validation set
Got 577 / 1000 correct (57.70)

KeyboardInterrupt: 

на валидации такое серьезное отставание от итогового результата из-за значительного оверфита, полагаю, точность на трейне тут под 80-90, итог, достигнутый данной сеткой - 73.5 на kaggle

In [72]:
torch.save(resnet.state_dict(), "ResNetDump_Dubovik4")

# Важно переключить сеть в режим eval - иначе dropout будет работать некорректно

In [77]:
def make_solution(a_net, a_device):
    res = []
    net = a_net.eval()
    for item in dataloader_test_norm:
        inputs = item.to(a_device)
        outputs = net(inputs) 

        res += prediction2classes(outputs)
    return res

In [78]:
# my_solution = make_solution(dense_net, DEVICE)
my_solution = make_solution(resnet.to(DEVICE), DEVICE)

In [79]:
with open('sunlution3.csv', 'w') as fout:
    print('Id', 'Prediction', sep=',', file=fout)
    for i, prediction in enumerate(my_solution):
        print(i, prediction, sep=',', file=fout)