In [1]:
from collections import OrderedDict
from dataclasses import dataclass
import os.path as path

import torch as t
import torch.nn.functional as F
import torchvision as tv

import numpy as np

In [2]:
DEVICE = t.device("cuda" if t.cuda.is_available() else "cpu")
DATAROOT = path.expanduser("~/mldata/pytorch")
DEVICE

device(type='cpu')

In [3]:
xform = tv.transforms.Compose([
    tv.transforms.ToTensor(),
    tv.transforms.Normalize((0.5,), (0.5,))
])

In [4]:
datapath = path.join(DATAROOT, "fashion-mnist")
train_val_set = tv.datasets.FashionMNIST(datapath, download=True, train=True, transform=xform)
train_size = int(len(train_val_set) * 0.8)
val_size = len(train_val_set) - train_size
trainset, valset = t.utils.data.random_split(train_val_set, [train_size, val_size])
print(f"Training set size: {train_size}, Validation set size: {val_size}")

Training set size: 48000, Validation set size: 12000


In [5]:
testset = tv.datasets.FashionMNIST(datapath, download=True, train=False, transform=xform)
print(f"Test set size: {len(testset)}")

Test set size: 10000


In [6]:
def create_model():
    model = t.nn.Sequential(OrderedDict([
        ("flatten", t.nn.Flatten()),
        ("fc1", t.nn.Linear(784, 128)),
        ("relu1", t.nn.ReLU()),
        ("fc2", t.nn.Linear(128, 64)),
        ("relu2", t.nn.ReLU()),
        ("fc3", t.nn.Linear(64, 32)),
        ("relu3", t.nn.ReLU()),
        ("logits", t.nn.Linear(32, 10))
    ]))
    return model

In [7]:
def accuracy(outputs, targets):
    assert outputs.shape[0] == targets.shape[0]
    predictions = t.argmax(outputs, dim=1)
    correct = t.sum(predictions == targets).item()
    return correct / targets.shape[0]

In [8]:
@dataclass
class Hyperparams:
    batch_size: int = 10
    epochs: int = 10
    learning_rate: float = 0.0001

    def to_dict(self):
        return {
            "batch_size": self.batch_size,
            "epochs": self.epochs,
            "learning_rate": self.learning_rate
        }

In [9]:
image, _ = next(iter(trainset))
batch_of_one = t.unsqueeze(image, 0)
print(image.shape, batch_of_one.shape)

torch.Size([1, 28, 28]) torch.Size([1, 1, 28, 28])


In [10]:
create_model().forward(batch_of_one)

tensor([[ 0.0990, -0.0407,  0.0240,  0.0867,  0.0274, -0.0667, -0.1145,  0.1009,
          0.0656,  0.1739]], grad_fn=<AddmmBackward>)

In [11]:
dl = t.utils.data.DataLoader(valset, batch_size=5000)
images, targets = next(iter(dl))
outputs = create_model().forward(images)
print(images.shape, targets.shape, outputs.shape)
targets = targets.detach()
ouputs = outputs.detach()
accuracy(outputs, targets)

torch.Size([5000, 1, 28, 28]) torch.Size([5000]) torch.Size([5000, 10])


0.101

In [12]:
def train(model, optim, loss_fn, epochs, trainloader, valloader):
    model = model.to(DEVICE)
    for epoch in range(epochs):
        # Process the training set
        train_losses = []
        train_outputs = t.empty(0, 10)
        train_targets = t.tensor([], dtype=t.long)
        model.train()
        with t.enable_grad():
            for images, targets in trainloader:
                images = images.to(DEVICE)
                targets = targets.to(DEVICE)

                optim.zero_grad()
                outputs = model.forward(images)
                loss = loss_fn(outputs, targets)
                loss.backward()
                optim.step()

                train_losses.append(loss.detach())
                train_outputs = t.cat((train_outputs, outputs.detach()))
                train_targets = t.cat((train_targets, targets.detach()))
        train_loss = np.mean(train_losses)
        train_acc = accuracy(train_outputs, train_targets)

        # Calculate the validation metrics
        val_losses = []
        val_outputs = t.empty(0, 10)
        val_targets = t.tensor([], dtype=t.long)
        model.eval()
        with t.no_grad():
            for images, targets in valloader:
                images = images.to(DEVICE)
                targets = targets.to(DEVICE)
                outputs = model(images)
                loss = loss_fn(outputs, targets)
                val_losses.append(loss.detach())
                val_outputs = t.cat((val_outputs, outputs.detach()))
                val_targets = t.cat((val_targets, targets.detach()))
        val_loss = np.mean(val_losses)
        val_acc = accuracy(val_outputs, val_targets)

        print(f"\nEpoch {epoch}:")
        print(f"Loss: train={train_loss:.3f}, validation={val_loss:.3f}")
        print(f"Accuracy: train={train_acc:.2f}, validation={val_acc:.2f}")

In [13]:
# hparams = Hyperparams(batch_size=32, epochs=10, learning_rate=0.05)
hparams = Hyperparams(batch_size=32, epochs=13, learning_rate=0.25)
model = create_model()
optim = t.optim.SGD(model.parameters(), lr=hparams.learning_rate)
loss_fn = t.nn.CrossEntropyLoss()
trainloader = t.utils.data.DataLoader(trainset, batch_size=hparams.batch_size, shuffle=True)
valloader = t.utils.data.DataLoader(valset, batch_size=5000)

In [14]:
train(model, optim, loss_fn, hparams.epochs, trainloader, valloader)


Epoch 0:
Loss: train=0.655, validation=0.492
Accuracy: train=0.76, validation=0.83

Epoch 1:
Loss: train=0.448, validation=0.399
Accuracy: train=0.84, validation=0.86

Epoch 2:
Loss: train=0.409, validation=0.373
Accuracy: train=0.85, validation=0.87

Epoch 3:
Loss: train=0.373, validation=0.408
Accuracy: train=0.86, validation=0.85

Epoch 4:
Loss: train=0.352, validation=0.353
Accuracy: train=0.87, validation=0.87

Epoch 5:
Loss: train=0.337, validation=0.360
Accuracy: train=0.88, validation=0.87

Epoch 6:
Loss: train=0.319, validation=0.347
Accuracy: train=0.88, validation=0.88

Epoch 7:
Loss: train=0.313, validation=0.386
Accuracy: train=0.89, validation=0.87

Epoch 8:
Loss: train=0.300, validation=0.335
Accuracy: train=0.89, validation=0.89

Epoch 9:
Loss: train=0.297, validation=0.347
Accuracy: train=0.89, validation=0.88

Epoch 10:
Loss: train=0.284, validation=0.364
Accuracy: train=0.90, validation=0.88

Epoch 11:
Loss: train=0.280, validation=0.344
Accuracy: train=0.90, valida

In [15]:
testloader = t.utils.data.DataLoader(testset, batch_size=len(testset))
images, targets = next(iter(testloader))
model.eval()
with t.no_grad():
    outputs = model(images)
    test_acc = accuracy(outputs.detach(), targets.detach())
print(f"Test accuracy: {test_acc:.2f}")

Test accuracy: 0.87
