## Домашнее задание №7

##### Автор: [Радослав Нейчев](https://www.linkedin.com/in/radoslav-neychev/), @neychev

In [89]:
import numpy as np

import torch
from torch import nn
from torch.nn import functional as F

import torchvision
from torchvision.datasets import MNIST

from matplotlib import pyplot as plt
from IPython.display import clear_output

### Задача №1: 
Обратимся к классической задаче распознавания рукописных цифр. Мы будем работать с набором данных [MNIST](http://yann.lecun.com/exdb/mnist/). В данном задании воспользуемся всем датасетом целиком.

__Ваша основная задача: реализовать весь пайплан обучения модели и добиться качества $\geq 92\%$ на тестовой выборке.__

Код для обучения модели в данном задании отсутствует. Присутствует лишь несколько тестов, которые помогут вам отладить свое решение. За примером можно обратиться к ноутбуку первого занятия.

Настоятельно рекомендуем написать код "с нуля", лишь поглядывая на готовые примеры, а не просто "скопировать-вставить". Это поможет вам в дальнейшем.

In [90]:
# do not change the code in the block below
# __________start of block__________

train_mnist_data = MNIST('.', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_mnist_data = MNIST('.', train=False, transform=torchvision.transforms.ToTensor(), download=True)


train_data_loader = torch.utils.data.DataLoader(
    train_mnist_data,
    batch_size=32,
    shuffle=True,
    num_workers=2
)

test_data_loader = torch.utils.data.DataLoader(
    test_mnist_data,
    batch_size=32,
    shuffle=False,
    num_workers=2
)

# random_batch = next(iter(train_data_loader))
# _image, _label = random_batch[0][0], random_batch[1][0]
# plt.figure()
# plt.imshow(_image.reshape(28, 28))
# plt.title(f'Image label: {_label}')
# __________end of block__________

Постройте модель ниже. Пожалуйста, не стройте переусложненную сеть, не стоит делать ее глубже четырех слоев (можно и меньше). Ваша основная задача – обучить модель и получить качество на отложенной (тестовой выборке) не менее 92% accuracy.

*Комментарий: для этого достаточно линейных слоев и функций активации.*

__Внимание, ваша модель должна быть представлена именно переменной `model`.__

## Dataset analysis

In [91]:
len(train_mnist_data), len(test_mnist_data)

(60000, 10000)

In [92]:
np.bincount(train_mnist_data.targets), np.bincount(test_mnist_data.targets)

(array([5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949],
       dtype=int64),
 array([ 980, 1135, 1032, 1010,  982,  892,  958, 1028,  974, 1009],
       dtype=int64))

In [93]:
train_mnist_data.data.shape, train_mnist_data.data[0].flatten().shape

(torch.Size([60000, 28, 28]), torch.Size([784]))

In [94]:
np.unique(train_mnist_data.data[0])

array([  0,   1,   2,   3,   9,  11,  14,  16,  18,  23,  24,  25,  26,
        27,  30,  35,  36,  39,  43,  45,  46,  49,  55,  56,  64,  66,
        70,  78,  80,  81,  82,  90,  93,  94, 107, 108, 114, 119, 126,
       127, 130, 132, 133, 135, 136, 139, 148, 150, 154, 156, 160, 166,
       170, 171, 172, 175, 182, 183, 186, 187, 190, 195, 198, 201, 205,
       207, 212, 213, 219, 221, 225, 226, 229, 238, 240, 241, 242, 244,
       247, 249, 250, 251, 252, 253, 255], dtype=uint8)

In [95]:
np.unique(train_mnist_data.targets)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

## Get device for training

In [96]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


## Define the model class

In [117]:
class NeuralNetwork(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential(
            
            nn.Linear(784, 392, bias=False),
            nn.BatchNorm1d(392),
            nn.ReLU(),
            
            nn.Linear(392, 196, bias=False),
            nn.BatchNorm1d(196),
            nn.ReLU(),
            
            nn.Linear(196, 98, bias=False),
            nn.BatchNorm1d(98),
            nn.ReLU(),
            
            nn.Linear(98, 10, bias=True),
        )
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.layers(x)
        return logits

In [118]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layers): Sequential(
    (0): Linear(in_features=784, out_features=392, bias=False)
    (1): BatchNorm1d(392, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=392, out_features=196, bias=False)
    (4): BatchNorm1d(196, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Linear(in_features=196, out_features=98, bias=False)
    (7): BatchNorm1d(98, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Linear(in_features=98, out_features=10, bias=True)
  )
)


## Train, test loops

In [122]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    model.eval()
    test_loss, correct = 0, 0
    
    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [123]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

## CrossEntropyLoss, SGD

In [124]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_data_loader, model, loss_fn, optimizer)
    test_loop(test_data_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.257586  [    0/60000]
loss: 0.290263  [ 3200/60000]
loss: 0.208776  [ 6400/60000]
loss: 0.260407  [ 9600/60000]
loss: 0.192656  [12800/60000]
loss: 0.295854  [16000/60000]
loss: 0.296098  [19200/60000]
loss: 0.290704  [22400/60000]
loss: 0.359307  [25600/60000]
loss: 0.223409  [28800/60000]
loss: 0.220825  [32000/60000]
loss: 0.117165  [35200/60000]
loss: 0.255295  [38400/60000]
loss: 0.142936  [41600/60000]
loss: 0.205026  [44800/60000]
loss: 0.279508  [48000/60000]
loss: 0.100770  [51200/60000]
loss: 0.214212  [54400/60000]
loss: 0.371842  [57600/60000]
Test Error: 
 Accuracy: 95.4%, Avg loss: 0.187137 

Epoch 2
-------------------------------
loss: 0.290807  [    0/60000]
loss: 0.152908  [ 3200/60000]
loss: 0.154312  [ 6400/60000]
loss: 0.116419  [ 9600/60000]
loss: 0.462713  [12800/60000]
loss: 0.230847  [16000/60000]
loss: 0.219189  [19200/60000]
loss: 0.280349  [22400/60000]
loss: 0.094755  [25600/60000]
loss: 0.303280  [28800/60000

## CrossEntropyLoss, Adam

In [125]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_data_loader, model, loss_fn, optimizer)
    test_loop(test_data_loader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 0.041683  [    0/60000]
loss: 0.445788  [ 3200/60000]
loss: 0.082648  [ 6400/60000]
loss: 0.248875  [ 9600/60000]
loss: 0.047300  [12800/60000]
loss: 0.577359  [16000/60000]
loss: 0.120267  [19200/60000]
loss: 0.161840  [22400/60000]
loss: 0.070026  [25600/60000]
loss: 0.248676  [28800/60000]
loss: 0.078387  [32000/60000]
loss: 0.269884  [35200/60000]
loss: 0.167777  [38400/60000]
loss: 0.155067  [41600/60000]
loss: 0.006160  [44800/60000]
loss: 0.396610  [48000/60000]
loss: 0.028212  [51200/60000]
loss: 0.059461  [54400/60000]
loss: 0.149545  [57600/60000]
Test Error: 
 Accuracy: 96.7%, Avg loss: 0.100201 

Epoch 2
-------------------------------
loss: 0.076076  [    0/60000]
loss: 0.086816  [ 3200/60000]
loss: 0.076779  [ 6400/60000]
loss: 0.054861  [ 9600/60000]
loss: 0.173854  [12800/60000]
loss: 0.283029  [16000/60000]
loss: 0.056418  [19200/60000]
loss: 0.006085  [22400/60000]
loss: 0.069718  [25600/60000]
loss: 0.145222  [28800/60000

Локальные тесты для проверки вашей модели доступны ниже:

In [126]:
# do not change the code in the block below
# __________start of block__________
assert model is not None, 'Please, use `model` variable to store your model'

try:
    x = random_batch[0].reshape(-1, 784)
    y = random_batch[1]

    # compute outputs given inputs, both are variables
    y_predicted = model(x)    
except Exception as e:
    print('Something is wrong with the model')
    raise e
    
    
assert y_predicted.shape[-1] == 10, 'Model should predict 10 logits/probas'

print('Everything seems fine!')
# __________end of block__________

Everything seems fine!


Настройте параметры модели на обучающей выборке. Рекомендуем поработать с различными оптимизаторами.

In [None]:
# your code here

Также, напоминаем, что в любой момент можно обратиться к замечательной [документации](https://pytorch.org/docs/stable/index.html) и [обучающим примерам](https://pytorch.org/tutorials/).  

Оценим качество классификации:

In [127]:
predicted_labels = []
real_labels = []
model.eval()
with torch.no_grad():
    for batch in train_data_loader:
        y_predicted = model(batch[0].reshape(-1, 784))
        predicted_labels.append(y_predicted.argmax(dim=1))
        real_labels.append(batch[1])

predicted_labels = torch.cat(predicted_labels)
real_labels = torch.cat(real_labels)
train_acc = (predicted_labels == real_labels).type(torch.FloatTensor).mean()

In [128]:
print(f'Neural network accuracy on train set: {train_acc:3.5}')

Neural network accuracy on train set: 0.99268


In [129]:
predicted_labels = []
real_labels = []
model.eval()
with torch.no_grad():
    for batch in test_data_loader:
        y_predicted = model(batch[0].reshape(-1, 784))
        predicted_labels.append(y_predicted.argmax(dim=1))
        real_labels.append(batch[1])

predicted_labels = torch.cat(predicted_labels)
real_labels = torch.cat(real_labels)
test_acc = (predicted_labels == real_labels).type(torch.FloatTensor).mean()

In [130]:
print(f'Neural network accuracy on test set: {test_acc:3.5}')

Neural network accuracy on test set: 0.9794


Проверка, что необходимые пороги пройдены:

In [131]:
assert test_acc >= 0.92, 'Test accuracy is below 0.92 threshold'
assert train_acc >= 0.91, 'Train accuracy is below 0.91 while test accuracy is fine. We recommend to check your model and data flow'

### Сдача задания
Загрузите файл `hw07_data_dict.npy` (ссылка есть на странице с заданием) и запустите код ниже для генерации посылки.

In [132]:
# do not change the code in the block below
# __________start of block__________
import os
import json
assert os.path.exists('hw07_data_dict.npy'), 'Please, download `hw07_data_dict.npy` and place it in the working directory'

def get_predictions(model, eval_data, step=10):
    
    predicted_labels = []
    model.eval()
    with torch.no_grad():
        for idx in range(0, len(eval_data), step):
            y_predicted = model(eval_data[idx:idx+step].reshape(-1, 784))
            predicted_labels.append(y_predicted.argmax(dim=1))
    
    predicted_labels = torch.cat(predicted_labels).numpy()
    predicted_labels = ','.join([str(x) for x in list(predicted_labels)])
    return predicted_labels

loaded_data_dict = np.load('hw07_data_dict.npy', allow_pickle=True)

submission_dict = {
    'train': get_predictions(model, torch.FloatTensor(loaded_data_dict.item()['train'])),
    'test': get_predictions(model, torch.FloatTensor(loaded_data_dict.item()['test']))
}

with open('submission_dict_hw07.json', 'w') as iofile:
    json.dump(submission_dict, iofile)
print('File saved to `submission_dict_hw07.json`')
# __________end of block__________

File saved to `submission_dict_hw07.json`


На этом задание завершено. Поздравляем!