In [2]:
import time
import torch
from torchvision import datasets, transforms
from torch import nn
%matplotlib inline

In [3]:
BATCH_SIZE=256

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'

train_dataset = datasets.FashionMNIST('./DSDZ3', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.FashionMNIST('./DSDZ3', train=False, transform=transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle = True)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle = False)

train_dataset[0][0].shape, device

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./DSDZ3\FashionMNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./DSDZ3\FashionMNIST\raw\train-images-idx3-ubyte.gz to ./DSDZ3\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./DSDZ3\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./DSDZ3\FashionMNIST\raw\train-labels-idx1-ubyte.gz to ./DSDZ3\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./DSDZ3\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./DSDZ3\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to ./DSDZ3\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz


100.0%

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./DSDZ3\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz
Extracting ./DSDZ3\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to ./DSDZ3\FashionMNIST\raw






(torch.Size([1, 28, 28]), 'cuda')

In [4]:
def train_model(**kwargs) -> None:
    loss_ = kwargs['loss']
    trainer_ = kwargs['trainer']
    num_epochs_ = kwargs['num_epochs']
    model_ = kwargs['model']
    train_ = kwargs['train']
    test_ = kwargs['test']
    device_ = kwargs['device']
    
    for ep in range(num_epochs_):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()
        
        model_.train()
        for X, y in train_:
            X, y = X.to(device_), y.to(device_)
            trainer_.zero_grad()
            y_pred = model_(X)
            l = loss_(y_pred, y)
            l.backward()
            trainer_.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model_.eval()
        for X, y in test_:
            X, y = X.to(device_), y.to(device_)
            y_pred = model_(X)
            l = loss_(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
            
        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed)
        )

In [5]:
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(3, stride=1),
    nn.Conv2d(6, 12, kernel_size=4),
    nn.ReLU(),
    nn.MaxPool2d(2, stride=2),
    nn.Flatten(),
    nn.Linear(1452, 502),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(502, 51),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(51, 10)
)
model = model.to(device)

train_model(
    loss=torch.nn.CrossEntropyLoss(),
    trainer=torch.optim.Adam(model.parameters(), lr=.01),
    num_epochs=20,
    model=model,
    train=train,
    test=test,
    device=device
)

ep: 0, taked: 27.745, train_loss: 0.655225603884839, train_acc: 0.7569, test_loss: 0.4317518025636673, test_acc: 0.8379
ep: 1, taked: 23.119, train_loss: 0.43542046141117174, train_acc: 0.84435, test_loss: 0.3432627562433481, test_acc: 0.8765
ep: 2, taked: 23.187, train_loss: 0.38407165129133997, train_acc: 0.86255, test_loss: 0.36668400689959524, test_acc: 0.8639
ep: 3, taked: 24.391, train_loss: 0.3572110560346157, train_acc: 0.8717666666666667, test_loss: 0.3281657982617617, test_acc: 0.8837
ep: 4, taked: 22.451, train_loss: 0.3423380477631346, train_acc: 0.8775166666666666, test_loss: 0.31921753771603106, test_acc: 0.8849
ep: 5, taked: 24.963, train_loss: 0.32783743111377067, train_acc: 0.8828666666666667, test_loss: 0.3057827863842249, test_acc: 0.8884
ep: 6, taked: 23.066, train_loss: 0.3250446049456901, train_acc: 0.8838333333333334, test_loss: 0.3027260836213827, test_acc: 0.8913
ep: 7, taked: 25.229, train_loss: 0.31814493968131696, train_acc: 0.8850833333333333, test_loss: 0.