
Постройте модель на основе полносвязных слоёв для классификации Fashion MNIST из библиотеки torchvision (datasets).
Получите качество на тестовой выборке не ниже 88%

Инструкция по выполнению задания

Скачайте тренировочную и тестовою часть датасета
Постройте модель, выбрав стартовую архитектуру
Обучите модель и сверьте качество на тестовой части с заданным порогом
Изменяйте архитектуру модели пока качество на тестовой части не будет выше порога. Вариации архитектуры можно реализовать через изменение количества слоёв, количества нейронов в слоях и использование регуляризации. Можно использовать различные оптимизаторы.

In [1]:
import torch

In [2]:
import torchvision as tv

In [3]:
import pandas as pd
import numpy as np

In [4]:
import time

In [5]:
BATCH_SIZE=256

In [6]:
train_dataset = tv.datasets.FashionMNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.FashionMNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 16257481.25it/s]


Extracting ./FashionMNIST/raw/train-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 307256.52it/s]


Extracting ./FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5451659.12it/s]


Extracting ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 4474156.03it/s]


Extracting ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw



In [7]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

In [8]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 10)
)

In [9]:
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [10]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.RMSprop(model.parameters(), lr=.01)
num_epochs = 10

In [11]:
import time

In [12]:
def train_model():
    for ep in range(num_epochs):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()

        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)

        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)

        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed)
        )

In [13]:

train_model()

ep: 0, taked: 14.270, train_loss: 2.0407474016889613, train_acc: 0.7641166666666667, test_loss: 0.6217028684914112, test_acc: 0.7773
ep: 1, taked: 10.554, train_loss: 0.46909929805613576, train_acc: 0.8311833333333334, test_loss: 0.4290535062551498, test_acc: 0.8456
ep: 2, taked: 10.180, train_loss: 0.42541170767013065, train_acc: 0.84515, test_loss: 0.4249147891998291, test_acc: 0.8457
ep: 3, taked: 10.308, train_loss: 0.3938518520999462, train_acc: 0.8545666666666667, test_loss: 0.49214446246623994, test_acc: 0.8383
ep: 4, taked: 10.316, train_loss: 0.38081164918047317, train_acc: 0.8610666666666666, test_loss: 0.4424743395298719, test_acc: 0.8463
ep: 5, taked: 9.177, train_loss: 0.3668817623498592, train_acc: 0.8653666666666666, test_loss: 0.40336456038057805, test_acc: 0.8591
ep: 6, taked: 11.007, train_loss: 0.34481846531654925, train_acc: 0.8715, test_loss: 0.5209329333156347, test_acc: 0.8136
ep: 7, taked: 11.691, train_loss: 0.36938562786325496, train_acc: 0.8717333333333334, t

In [14]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(),
    torch.nn.Linear(256, 128),
    torch.nn.ReLU(),
    torch.nn.Linear(128, 64),
    torch.nn.ReLU(),
    torch.nn.Linear(64, 10)
)

In [16]:

trainer = torch.optim.Adam(model.parameters(), lr=.001)
train_model()

ep: 0, taked: 11.107, train_loss: 0.22612467313066442, train_acc: 0.9159666666666667, test_loss: 0.328816170245409, test_acc: 0.886
ep: 1, taked: 11.252, train_loss: 0.21350621955191834, train_acc: 0.9211333333333334, test_loss: 0.3325360544025898, test_acc: 0.8869
ep: 2, taked: 10.918, train_loss: 0.2030383051075834, train_acc: 0.9245666666666666, test_loss: 0.336909762211144, test_acc: 0.8872
ep: 3, taked: 11.104, train_loss: 0.19459581650952076, train_acc: 0.92765, test_loss: 0.33982949107885363, test_acc: 0.8863
ep: 4, taked: 11.126, train_loss: 0.18747292582659011, train_acc: 0.9305166666666667, test_loss: 0.35041961167007685, test_acc: 0.8856
ep: 5, taked: 11.036, train_loss: 0.1810510037110207, train_acc: 0.9326166666666666, test_loss: 0.3535566784441471, test_acc: 0.8866
ep: 6, taked: 11.083, train_loss: 0.1739396665324556, train_acc: 0.93485, test_loss: 0.35328023731708524, test_acc: 0.8878
ep: 7, taked: 10.953, train_loss: 0.1658152570432805, train_acc: 0.9384166666666667, te