## Lab 3: final challenges

__Вам предлагается решить задачу классификации сигналов (вы уже встречались с ней во второй лабораторной работе) или задачу классификации изображений. Или обе ;)__

__Выполнение этих заданий не является обязательным, но позитивно повлияет на вашу итоговую оценку. Успехов!__


### Part 5. Dogs classification (2+ points)
__Disclaimer__: Это опциональная часть задания. Здесь придется экспериментировать, подбирать оптимальную структуру сети для решения задачи и активно искать подскзаки в сети.

Предлагаем вам решить задачу классификации пород собак. Вы можете обучить сеть с нуля или же воспользоваться методом fine-tuning'а. Полезная ссылка на [предобученные модели](https://pytorch.org/docs/stable/torchvision/models.html).

Данные можно скачать [отсюда](https://www.dropbox.com/s/vgqpz2f1lolxmlv/data.zip?dl=0). Датасет представлен 50 классами пород собак, которые можно найти в папке train в соответствующих директориях. При сдаче данной части задания вместе с ноутбуком необходимо отправить .csv-файл с предсказаниями классов тестовой выборки в формате: <имя изображения>,<метка класса> по одному объекту на строку. Ниже приведите код ваших экспериментов и короткий вывод по их результатам.

Будут оцениваться качество классификации (accuracy) на тестовой выборке (2 балла) и проведенные эксперименты (1 балл).
Разбалловка следующая:
* $>=$93% - 2 points
* $>=$84% - 1.5 points
* $>=$70% - 0.75 points

In [1]:
from torchvision import transforms
import torchvision.datasets as datasets
from torchsummary import summary
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import time
from tqdm import tqdm_notebook as tqdm

import os
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [2]:
NUM_WORKERS = 4

EPOCH_NUM = 10
BATCH_SIZE = 64

LEARNING_RATE = 3e-4

In [3]:
image_mean = [0.485, 0.456, 0.406]
image_std  = [0.229, 0.224, 0.225]

data_transforms = transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.8,1.)),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, 
                               saturation=0.5, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(image_mean, image_std)
    ])


dog_dataset = datasets.ImageFolder(root=r"./data/train", transform=data_transforms)

VALIDATION_SPLIT = .2
n_train, n_val = int((1-VALIDATION_SPLIT) * len(dog_dataset)), int(VALIDATION_SPLIT * len(dog_dataset))

In [4]:
dataset_size = len(dog_dataset)
indices = list(range(dataset_size))
split = int(np.floor(VALIDATION_SPLIT * dataset_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]


train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_batch_gen = torch.utils.data.DataLoader(dog_dataset, 
                                              batch_size=BATCH_SIZE,
                                              num_workers=NUM_WORKERS,
                                              sampler=train_sampler)

val_batch_gen = torch.utils.data.DataLoader(dog_dataset,
                                            batch_size=BATCH_SIZE,
                                            num_workers=NUM_WORKERS,
                                            sampler=valid_sampler)

In [5]:
def compute_loss(model, X_batch, y_batch):
    logits = model(X_batch)
    crossentropy = nn.CrossEntropyLoss(reduction='none')
    loss = torch.mean(crossentropy(logits, y_batch), 0, keepdim=True)
    return loss

In [6]:
def train_model(model, train_batch_generator, val_batch_generator, opt, 
                model_name=None, n_epochs=EPOCH_NUM):
    train_loss = []
    val_accuracy = []
    best_model = None
    top_val_accuracy = 0
    
    for epoch in range(n_epochs):
        start_time = time.time()

        model.train(True)
        for (X_batch, y_batch) in tqdm(train_batch_generator, desc='Training'):
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            
            loss = compute_loss(model, X_batch, y_batch)
            
            loss.backward()
            
            opt.step()
            
            opt.zero_grad()
            
            train_loss.append(loss.cpu().data.numpy())

        model.train(False)
        for X_batch, y_batch in val_batch_generator:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            logits = model(X_batch)
            
            y_pred = logits.max(1)[1].data
            val_accuracy.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))

        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, n_epochs, time.time() - start_time))
        train_loss_value = np.mean(train_loss)
        val_accuracy_value = np.mean(val_accuracy)
        
        if val_accuracy_value > top_val_accuracy and model_name is not None:
            top_val_accuracy = val_accuracy_value
            with open(model_name, 'wb') as f: torch.save(model, f)

        print("  training loss (in-iteration): \t{:.6f}".format(train_loss_value))
        print("  validation accuracy: \t\t\t{:.2f} %".format(val_accuracy_value  * 100))
        custom_objects = (train_loss, val_accuracy)
        
    return best_model, opt, custom_objects

def test_model(model, test_batch_generator, subset='test'):
    model.train(False)
    test_batch_acc = []
    for X_batch, y_batch in test_batch_generator:
        logits = model(X_batch.to(device))
        y_pred = logits.max(1)[1].data
        test_batch_acc.append(np.mean( (y_batch.cpu() == y_pred.cpu()).numpy() ))

    test_accuracy = np.mean(test_batch_acc)
    
    print("Results:")
    print("  {} accuracy:\t\t{:.2f} %".format(subset, test_accuracy * 100))
    return test_accuracy

In [7]:
google_net_model = torchvision.models.googlenet(pretrained=True)

In [8]:
opt = torch.optim.Adam(google_net_model.parameters(), lr=LEARNING_RATE)
model_name='model_googlenet.ckpt'

google_net_model, opt_vgg16, custom_objects = train_model(google_net_model,
                                                         train_batch_gen, 
                                                         val_batch_gen,
                                                         opt,
                                                         ckpt_name=model_name,
                                                         n_epochs=10)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  # This is added back by InteractiveShellApp.init_path()


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 1 of 10 took 1623.860s
  training loss (in-iteration): 	2.598456
  validation accuracy: 			73.94 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 2 of 10 took 2184.758s
  training loss (in-iteration): 	1.607225
  validation accuracy: 			75.87 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 3 of 10 took 1862.784s
  training loss (in-iteration): 	1.164220
  validation accuracy: 			76.60 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 4 of 10 took 1522.579s
  training loss (in-iteration): 	0.914602
  validation accuracy: 			77.10 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 5 of 10 took 3530.097s
  training loss (in-iteration): 	0.750595
  validation accuracy: 			77.78 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 6 of 10 took 4772.310s
  training loss (in-iteration): 	0.636363
  validation accuracy: 			78.20 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 7 of 10 took 2951.862s
  training loss (in-iteration): 	0.554640
  validation accuracy: 			78.25 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 8 of 10 took 1511.449s
  training loss (in-iteration): 	0.494165
  validation accuracy: 			78.27 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 9 of 10 took 3786.318s
  training loss (in-iteration): 	0.445490
  validation accuracy: 			78.41 %


HBox(children=(FloatProgress(value=0.0, description='Training', max=90.0, style=ProgressStyle(description_widt…


Epoch 10 of 10 took 2053.431s
  training loss (in-iteration): 	0.405534
  validation accuracy: 			78.40 %


**Вывод:** Поработал с разными моделями из `torchvision.models`: VGG, alexnet, googlenet. Выбрал последнюю, потому что относительно остальных она довольно быстро обучается, попробовал в нормализацию картинок `data_transforms` (нагуглил).

In [12]:
test_data_transforms = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(image_mean, image_std)
    ])


test_dataset = datasets.ImageFolder("data", test_data_transforms)
n_test = len(test_dataset)


In [16]:
best_model = None
with open(model_name, 'rb') as f:
    best_model = torch.load(f)
    
best_model.train(False)

In [19]:
class IterDataSet(torch.utils.data.Dataset):

    def __init__(self, root_dir='data/test', transform = test_data_transforms):
        self.root_dir = root_dir
        self.transform = test_data_transforms

    def __getitem__(self, idx):
        s = str(idx) +'.jpeg'
        img_name = os.path.join(self.root_dir, s)
        image = io.imread(img_name)
        
        sample = {'image': self.transform(Image.fromarray(image)), 'name': s}
        return sample

In [21]:
tdd = IterDataSet()
cl_to_idx = train_batch_gen.dataset.class_to_idx
idx_to_cl = {cl_to_idx[k]:k for k in cl_to_idx.keys()}

pred_label = []
names = []

for i in tqdm(range(1502), desc = 'test'):
    sample = tdd[i]
    
    test_img = sample['image'].unsqueeze(0).to(device)
    logits = best_model(test_img)
    y_pred = logits.max(1)[1].data.cpu().numpy()[0]
    y_pred = idx_to_cl[y_pred]
    
    pred_label.append(y_pred)
    names.append(sample['name'])

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  


HBox(children=(FloatProgress(value=0.0, description='test', max=1502.0, style=ProgressStyle(description_width=…




In [22]:
sub = pd.DataFrame(names, columns=['name'])

sub['predictions'] = preds

sub.to_csv('data/final_ans.csv')