In [1]:
import torch
from torch import nn
from torch.nn import functional as F

In [2]:
import torchvision
from torchvision import transforms
from torch.utils.data import TensorDataset, DataLoader

## **Dataset**

In [3]:
trans = transforms.Compose([transforms.Resize((32, 32)),  # upscale
                            transforms.ToTensor()])

data_train = torchvision.datasets.FashionMNIST(
    root='./data', train=True, transform=trans, download=False 
)
data_val = torchvision.datasets.FashionMNIST(
    root='./data', train=False, transform=trans, download=False
)

In [4]:
batch_size = 256
train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(data_val, batch_size=batch_size, shuffle=False)

In [5]:
X, y = next(iter(train_loader))

In [6]:
print(X.shape)
print(y.shape)

torch.Size([256, 1, 32, 32])
torch.Size([256])


## **Model**

In [7]:
class SoftmaxRegression(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(num_inputs, num_outputs)
        )

    def forward(self, X):
        return self.net(X)

## **Loss**

In [8]:
def cross_entropy(y_hat, y):
    # y_hat: (B, q)
    # y: (B)
    return F.cross_entropy(y_hat, y, reduction='mean')

In [9]:
def accuracy(y_hat, y):
    # y_hat: (B, q)
    # y: (B)
    preds = y_hat.argmax(axis=1).type(y.dtype)  # (B)
    compare = (preds == y).type(torch.float32)  # (B)
    return compare.mean()

## **Training**

In [10]:
model = SoftmaxRegression(num_inputs=1*32*32, num_outputs=10)
model

SoftmaxRegression(
  (net): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=1024, out_features=10, bias=True)
  )
)

In [11]:
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1)

In [12]:
%%time
max_epochs = 10

for i in range(max_epochs):
    train_loss = 0
    num_train_batches = 0
    
    for X, y in train_loader:
        optimizer.zero_grad()
        y_hat = model(X)
        loss = cross_entropy(y_hat, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        num_train_batches += 1

    val_loss = 0
    val_acc = 0
    num_val_batches = 0
    with torch.no_grad():
        for X, y in val_loader:
            y_hat = model(X)
            loss = cross_entropy(y_hat, y)
            val_loss += loss.item()
            num_val_batches += 1
            val_acc += accuracy(y_hat, y)

    print(f'epoch={i:02d} | train_loss={train_loss/num_train_batches:.4f} | val_loss={val_loss/num_val_batches:.4f} | val_acc={val_acc/num_val_batches:.4f}')

epoch=00 | train_loss=0.7803 | val_loss=0.6188 | val_acc=0.7910
epoch=01 | train_loss=0.5742 | val_loss=0.5590 | val_acc=0.8101
epoch=02 | train_loss=0.5286 | val_loss=0.5361 | val_acc=0.8173
epoch=03 | train_loss=0.5051 | val_loss=0.5334 | val_acc=0.8118
epoch=04 | train_loss=0.4902 | val_loss=0.5260 | val_acc=0.8228
epoch=05 | train_loss=0.4790 | val_loss=0.6973 | val_acc=0.7680
epoch=06 | train_loss=0.4706 | val_loss=0.4898 | val_acc=0.8293
epoch=07 | train_loss=0.4634 | val_loss=0.4820 | val_acc=0.8341
epoch=08 | train_loss=0.4579 | val_loss=0.4789 | val_acc=0.8341
epoch=09 | train_loss=0.4524 | val_loss=0.4901 | val_acc=0.8313
CPU times: total: 8min 11s
Wall time: 1min 22s
