In [1]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torchvision as tv

## Данные

In [2]:
BATCH_SIZE=256

In [3]:
train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

## Функция для обучения

In [4]:
def train_model(num_epochs):
    for ep in range(num_epochs):
        train_iters, train_passed = 0, 0
        train_loss, train_acc = 0., 0.
        start = time.time()
        
        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
            
        print("ep: {}, taked: {:.3f}, train_loss: {:.3f}, train_acc: {:.3f}, test_loss: {:.3f}, test_acc: {:.3f}".format(
            ep, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed)
        )

## Модель 1
 - 1 скрытый слой на 256 нейронов
 - Оптимизатор Adam
 - Обучение 20 эпох

In [5]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters())
train_model(num_epochs=20)

ep: 0, taked: 7.417, train_loss: 0.620, train_acc: 0.790, test_loss: 0.494, test_acc: 0.822
ep: 1, taked: 8.011, train_loss: 0.428, train_acc: 0.850, test_loss: 0.441, test_acc: 0.844
ep: 2, taked: 7.867, train_loss: 0.384, train_acc: 0.864, test_loss: 0.409, test_acc: 0.855
ep: 3, taked: 7.913, train_loss: 0.356, train_acc: 0.873, test_loss: 0.388, test_acc: 0.862
ep: 4, taked: 9.105, train_loss: 0.334, train_acc: 0.880, test_loss: 0.372, test_acc: 0.868
ep: 5, taked: 11.164, train_loss: 0.317, train_acc: 0.886, test_loss: 0.362, test_acc: 0.869
ep: 6, taked: 8.568, train_loss: 0.303, train_acc: 0.891, test_loss: 0.355, test_acc: 0.873
ep: 7, taked: 7.954, train_loss: 0.291, train_acc: 0.895, test_loss: 0.351, test_acc: 0.873
ep: 8, taked: 7.945, train_loss: 0.280, train_acc: 0.899, test_loss: 0.349, test_acc: 0.874
ep: 9, taked: 7.940, train_loss: 0.269, train_acc: 0.903, test_loss: 0.344, test_acc: 0.877
ep: 10, taked: 8.042, train_loss: 0.260, train_acc: 0.906, test_loss: 0.342, te

 - Модель 1 достигла точности на тестовой выборке 88% после прохождения 15 эпох
 - Лучшая эпоха по тестовой выборке: `ep: 15, test_loss: 0.339, test_acc: 0.880`
 - Лучшая модель: Модель 1 `ep: 15, test_loss: 0.339, test_acc: 0.880`

## Модель 2
 - 3 скрытых слоя на 512, 256 и 128 нейронов
 - Оптимизатор Adam
 - Обучение 20 эпох

In [6]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 10)
)

loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters())
train_model(num_epochs=20)

ep: 0, taked: 10.045, train_loss: 0.631, train_acc: 0.778, test_loss: 0.465, test_acc: 0.831
ep: 1, taked: 12.781, train_loss: 0.402, train_acc: 0.856, test_loss: 0.401, test_acc: 0.857
ep: 2, taked: 12.082, train_loss: 0.351, train_acc: 0.872, test_loss: 0.384, test_acc: 0.861
ep: 3, taked: 12.079, train_loss: 0.320, train_acc: 0.882, test_loss: 0.377, test_acc: 0.864
ep: 4, taked: 10.752, train_loss: 0.298, train_acc: 0.890, test_loss: 0.377, test_acc: 0.867
ep: 5, taked: 11.868, train_loss: 0.279, train_acc: 0.897, test_loss: 0.370, test_acc: 0.871
ep: 6, taked: 13.147, train_loss: 0.264, train_acc: 0.902, test_loss: 0.370, test_acc: 0.871
ep: 7, taked: 10.108, train_loss: 0.254, train_acc: 0.905, test_loss: 0.346, test_acc: 0.876
ep: 8, taked: 12.062, train_loss: 0.242, train_acc: 0.910, test_loss: 0.342, test_acc: 0.879
ep: 9, taked: 10.238, train_loss: 0.230, train_acc: 0.914, test_loss: 0.355, test_acc: 0.878
ep: 10, taked: 11.476, train_loss: 0.221, train_acc: 0.918, test_loss:

 - Модель 2 достигла точности на тестовой выборке 88% после прохождения 12 эпох
 - Лучшая эпоха по тестовой выборке: `ep: 14, test_loss: 0.338, test_acc: 0.887`
 - Лучшая модель: Модель 2 `ep: 14, test_loss: 0.338, test_acc: 0.887`

## Модель 3
 - 3 скрытых слоя на 512, 256 и 128 нейронов
 - В каждом скрытом слое нормализация по батчу перед активацией
 - Оптимизатор Adam
 - Обучение 20 эпох

In [7]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.BatchNorm1d(512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, 256),
    torch.nn.BatchNorm1d(256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 128),
    torch.nn.BatchNorm1d(128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 10)
)

loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters())
train_model(num_epochs=20)

ep: 0, taked: 11.443, train_loss: 0.464, train_acc: 0.842, test_loss: 0.394, test_acc: 0.853
ep: 1, taked: 11.468, train_loss: 0.318, train_acc: 0.883, test_loss: 0.385, test_acc: 0.858
ep: 2, taked: 11.417, train_loss: 0.276, train_acc: 0.898, test_loss: 0.383, test_acc: 0.862
ep: 3, taked: 10.254, train_loss: 0.247, train_acc: 0.910, test_loss: 0.373, test_acc: 0.863
ep: 4, taked: 11.406, train_loss: 0.221, train_acc: 0.919, test_loss: 0.359, test_acc: 0.871
ep: 5, taked: 11.220, train_loss: 0.202, train_acc: 0.925, test_loss: 0.371, test_acc: 0.867
ep: 6, taked: 11.775, train_loss: 0.185, train_acc: 0.932, test_loss: 0.361, test_acc: 0.875
ep: 7, taked: 14.427, train_loss: 0.169, train_acc: 0.937, test_loss: 0.392, test_acc: 0.872
ep: 8, taked: 10.445, train_loss: 0.154, train_acc: 0.943, test_loss: 0.367, test_acc: 0.877
ep: 9, taked: 13.074, train_loss: 0.141, train_acc: 0.948, test_loss: 0.383, test_acc: 0.878
ep: 10, taked: 10.414, train_loss: 0.128, train_acc: 0.953, test_loss:

 - Модель 3 достигла точности на тестовой выборке 88% после прохождения 12 эпох
 - Лучшая эпоха по тестовой выборке: `ep: 4, test_loss: 0.359, test_acc: 0.871`
 - Лучшая модель: Модель 2 `ep: 14, test_loss: 0.338, test_acc: 0.887`

## Модель 4
 - 3 скрытых слоя на 512, 256 и 128 нейронов
 - В каждом скрытом слое нормализация по батчу перед активацией и дропаут после активации
 - Оптимизатор Adam
 - Обучение 30 эпох

In [9]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.BatchNorm1d(512),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(512, 256),
    torch.nn.BatchNorm1d(256),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(256, 128),
    torch.nn.BatchNorm1d(128),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.5),
    torch.nn.Linear(128, 10)
)

loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.Adam(model.parameters())
train_model(num_epochs=30)

ep: 0, taked: 10.584, train_loss: 0.704, train_acc: 0.769, test_loss: 0.441, test_acc: 0.840
ep: 1, taked: 11.793, train_loss: 0.470, train_acc: 0.839, test_loss: 0.400, test_acc: 0.848
ep: 2, taked: 11.975, train_loss: 0.425, train_acc: 0.852, test_loss: 0.373, test_acc: 0.862
ep: 3, taked: 11.533, train_loss: 0.399, train_acc: 0.860, test_loss: 0.365, test_acc: 0.866
ep: 4, taked: 13.451, train_loss: 0.380, train_acc: 0.866, test_loss: 0.355, test_acc: 0.868
ep: 5, taked: 11.542, train_loss: 0.365, train_acc: 0.872, test_loss: 0.356, test_acc: 0.866
ep: 6, taked: 11.589, train_loss: 0.353, train_acc: 0.876, test_loss: 0.341, test_acc: 0.874
ep: 7, taked: 11.745, train_loss: 0.341, train_acc: 0.878, test_loss: 0.342, test_acc: 0.871
ep: 8, taked: 19.658, train_loss: 0.332, train_acc: 0.880, test_loss: 0.326, test_acc: 0.881
ep: 9, taked: 10.899, train_loss: 0.327, train_acc: 0.884, test_loss: 0.326, test_acc: 0.878
ep: 10, taked: 19.338, train_loss: 0.315, train_acc: 0.888, test_loss:

 - Модель 4 достигла точности на тестовой выборке 88% после прохождения 9 эпох
 - Лучшая эпоха по тестовой выборке: `ep: 28, test_loss: 0.296, test_acc: 0.897`
 - Лучшая модель: Модель 4 `ep: 28, test_loss: 0.296, test_acc: 0.897`

Модель 4 уже уверенно перешагнула порог точности 88%, приблизившись к 90%. Судя по логу процесса оптимизации Модель 4 могла бы продолжить обучение дольше 30 эпох, сходясь к еще более качественному решению.