In [1]:
import time
import torch
from torchvision import datasets, transforms
from torch import nn
%matplotlib inline

In [40]:
BATCH_SIZE=256

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'

train_dataset = datasets.FashionMNIST('./DSDZ2', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.FashionMNIST('./DSDZ2', train=False, transform=transforms.ToTensor(), download=True)
train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle = True)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle = False)

train_dataset[0][0].shape, device

(torch.Size([1, 28, 28]), 'cuda')

In [34]:
def train_model(**kwargs) -> None:
    loss_ = kwargs['loss']
    trainer_ = kwargs['trainer']
    num_epochs_ = kwargs['num_epochs']
    model_ = kwargs['model']
    train_ = kwargs['train']
    test_ = kwargs['test']
    device_ = kwargs['device']
    
    for ep in range(num_epochs_):
        train_iters, train_passed  = 0, 0
        train_loss, train_acc = 0., 0.
        start=time.time()
        
        model_.train()
        for X, y in train_:
            X, y = X.to(device_), y.to(device_)
            trainer_.zero_grad()
            y_pred = model_(X)
            l = loss_(y_pred, y)
            l.backward()
            trainer_.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(dim=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed  = 0, 0
        test_loss, test_acc = 0., 0.
        model_.eval()
        for X, y in test_:
            X, y = X.to(device_), y.to(device_)
            y_pred = model_(X)
            l = loss_(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(dim=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
            
        print("ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}".format(
            ep, time.time() - start, train_loss / train_iters, train_acc / train_passed,
            test_loss / test_iters, test_acc / test_passed)
        )

In [38]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Linear(784, 392),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(392),
    torch.nn.Linear(392, 196),
    torch.nn.ReLU(),
    torch.nn.BatchNorm1d(196),
    torch.nn.Linear(196, 10)
)
model = model.to(device)

train_model(
    loss=torch.nn.CrossEntropyLoss(),
    trainer=torch.optim.Adagrad(model.parameters(), lr=.01),
    num_epochs=20,
    model=model,
    train=train,
    test=test,
    device=device
)

ep: 0, taked: 18.066, train_loss: 0.44568875165695837, train_acc: 0.8390333333333333, test_loss: 0.4661523785442114, test_acc: 0.8344
ep: 1, taked: 17.778, train_loss: 0.3269379487063022, train_acc: 0.8801, test_loss: 0.418378297239542, test_acc: 0.8507
ep: 2, taked: 17.921, train_loss: 0.29261100412683283, train_acc: 0.8923166666666666, test_loss: 0.3570056758821011, test_acc: 0.8702
ep: 3, taked: 17.934, train_loss: 0.2679370835106424, train_acc: 0.9009333333333334, test_loss: 0.35521307922899725, test_acc: 0.8719
ep: 4, taked: 17.935, train_loss: 0.24940649607080095, train_acc: 0.90785, test_loss: 0.38932172060012815, test_acc: 0.8525
ep: 5, taked: 17.813, train_loss: 0.2326535573665132, train_acc: 0.9135833333333333, test_loss: 0.333658261410892, test_acc: 0.8785
ep: 6, taked: 17.980, train_loss: 0.22187178274418445, train_acc: 0.9183666666666667, test_loss: 0.3391524847596884, test_acc: 0.8808
ep: 7, taked: 17.971, train_loss: 0.20836360727218872, train_acc: 0.9225666666666666, te

In [41]:
model = torch.nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 1568),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(1568, 784),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(784, 392),
    nn.ReLU(),
    nn.BatchNorm1d(392),
    nn.Linear(392, 10)
)
model = model.to(device)

train_model(
    loss=torch.nn.CrossEntropyLoss(),
    trainer=torch.optim.Adam(model.parameters(), lr=.01),
    num_epochs=20,
    model=model,
    train=train,
    test=test,
    device=device
)

ep: 0, taked: 21.369, train_loss: 0.6852187145263591, train_acc: 0.7434333333333333, test_loss: 0.47769091203808783, test_acc: 0.8201
ep: 1, taked: 21.019, train_loss: 0.478857628969436, train_acc: 0.8227166666666667, test_loss: 0.4649372730404139, test_acc: 0.8219
ep: 2, taked: 21.637, train_loss: 0.43876933024284687, train_acc: 0.8407, test_loss: 0.435917592048645, test_acc: 0.8438
ep: 3, taked: 21.463, train_loss: 0.4190906655281148, train_acc: 0.8467166666666667, test_loss: 0.41792860329151155, test_acc: 0.837
ep: 4, taked: 22.340, train_loss: 0.3988557479483016, train_acc: 0.8548333333333333, test_loss: 0.37064841128885745, test_acc: 0.8627
ep: 5, taked: 25.334, train_loss: 0.37980081997019177, train_acc: 0.8626, test_loss: 0.3662508435547352, test_acc: 0.8642
ep: 6, taked: 22.209, train_loss: 0.3720552348710121, train_acc: 0.8626, test_loss: 0.3686878360807896, test_acc: 0.8627
ep: 7, taked: 22.046, train_loss: 0.3646602376344356, train_acc: 0.8671333333333333, test_loss: 0.36341