
Постройте модель на основе полносвязных слоёв для классификации Fashion MNIST из библиотеки torchvision (datasets).
Получите качество на тестовой выборке не ниже 88%

Инструкция по выполнению задания

Скачайте тренировочную и тестовою часть датасета
Постройте модель, выбрав стартовую архитектуру
Обучите модель и сверьте качество на тестовой части с заданным порогом
Изменяйте архитектуру модели пока качество на тестовой части не будет выше порога. Вариации архитектуры можно реализовать через изменение количества слоёв, количества нейронов в слоях и использование регуляризации. Можно использовать различные оптимизаторы.

In [1]:
import torch

In [2]:
import torchvision as tv

In [3]:
import pandas as pd
import numpy as np

In [4]:
import time

In [5]:
BATCH_SIZE=256

In [6]:
train_dataset = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 152309277.77it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 37713478.77it/s]


Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 46377094.05it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 15024076.32it/s]


Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [7]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

In [8]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

In [9]:
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [10]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.RMSprop(model.parameters(), lr=.01)
num_epochs = 10

In [11]:
import time

In [12]:
def train_model():
    for ep in range(num_epochs):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()

        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)

        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)

        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed)
        )

In [13]:

train_model()

ep: 0, taked: 9.639, train_loss: 0.776890156838171, train_acc: 0.9005, test_loss: 0.28165677823126317, test_acc: 0.9157
ep: 1, taked: 9.077, train_loss: 0.1487927075990654, train_acc: 0.9556666666666667, test_loss: 0.23638678416609765, test_acc: 0.9302
ep: 2, taked: 9.199, train_loss: 0.10852411432350252, train_acc: 0.96715, test_loss: 0.14051855301950128, test_acc: 0.9589
ep: 3, taked: 9.560, train_loss: 0.0911896237310894, train_acc: 0.9728833333333333, test_loss: 0.1512125694192946, test_acc: 0.9571
ep: 4, taked: 9.499, train_loss: 0.07243171855757766, train_acc: 0.9785666666666667, test_loss: 0.1355069418437779, test_acc: 0.9624
ep: 5, taked: 9.053, train_loss: 0.05804299227100738, train_acc: 0.98175, test_loss: 0.13429821088793686, test_acc: 0.9646
ep: 6, taked: 9.207, train_loss: 0.05111487050580376, train_acc: 0.9835666666666667, test_loss: 0.12414167001552415, test_acc: 0.9676
ep: 7, taked: 9.502, train_loss: 0.046535821549812374, train_acc: 0.9855833333333334, test_loss: 0.139

In [16]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 10)
)

In [17]:

trainer = torch.optim.RMSprop(model.parameters(), lr=.01)
train_model()

ep: 0, taked: 11.940, train_loss: 4.023634274280135, train_acc: 0.8275, test_loss: 0.5719333052635193, test_acc: 0.8674
ep: 1, taked: 11.801, train_loss: 0.2012548716619928, train_acc: 0.9402, test_loss: 0.5223146644420922, test_acc: 0.8766
ep: 2, taked: 11.835, train_loss: 0.18514325356388345, train_acc: 0.94695, test_loss: 0.19969452960649506, test_acc: 0.9466
ep: 3, taked: 11.780, train_loss: 0.1511544990491994, train_acc: 0.9559666666666666, test_loss: 0.1590937664732337, test_acc: 0.9562
ep: 4, taked: 11.779, train_loss: 0.16036528693710236, train_acc: 0.9573333333333334, test_loss: 0.16379800703725778, test_acc: 0.9591
ep: 5, taked: 11.608, train_loss: 0.10786810573586758, train_acc: 0.9689, test_loss: 0.16687535899400247, test_acc: 0.9592
ep: 6, taked: 11.383, train_loss: 0.13432616291646945, train_acc: 0.9671166666666666, test_loss: 0.15091781228547915, test_acc: 0.9637
ep: 7, taked: 11.373, train_loss: 0.09289909549493422, train_acc: 0.9736333333333334, test_loss: 0.1815018050

In [18]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(512),
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(256),
    torch.nn.Linear(256, 128),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(128),
    torch.nn.Linear(128, 10)
)

In [19]:
trainer = torch.optim.RMSprop(model.parameters(), lr=.01)
train_model()

ep: 0, taked: 12.075, train_loss: 0.23267957122719035, train_acc: 0.9294, test_loss: 0.1616538783768192, test_acc: 0.9462
ep: 1, taked: 11.994, train_loss: 0.10194306785359662, train_acc: 0.9689166666666666, test_loss: 0.12779887910583057, test_acc: 0.9603
ep: 2, taked: 11.962, train_loss: 0.07231868238564819, train_acc: 0.9781833333333333, test_loss: 0.29014088594703935, test_acc: 0.9244
ep: 3, taked: 11.887, train_loss: 0.05737338457454709, train_acc: 0.9819833333333333, test_loss: 0.16540324268862605, test_acc: 0.957
ep: 4, taked: 12.838, train_loss: 0.043728433736411734, train_acc: 0.98615, test_loss: 0.11038600715983193, test_acc: 0.9683
ep: 5, taked: 11.877, train_loss: 0.03438234965643905, train_acc: 0.9891, test_loss: 0.1308876895043795, test_acc: 0.9674
ep: 6, taked: 11.937, train_loss: 0.031237549286216814, train_acc: 0.9899, test_loss: 0.1114725422594347, test_acc: 0.973
ep: 7, taked: 11.904, train_loss: 0.024778889793100112, train_acc: 0.9919333333333333, test_loss: 0.09110