In [61]:
## Importing packages

import torch
from torch import nn

import torchvision
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
from tqdm.auto import tqdm

In [62]:
## Setup mps for training on apple Silicon

device = "mps" if torch.backends.mps.is_available() else "cpu"
device = "cpu"

In [63]:
## Setup training data

data_train = datasets.FashionMNIST(
    root="data", 
    train=True, 
    download=True, 
    transform=ToTensor(), 
    target_transform=None 
)

## Setup testing data

data_test = datasets.FashionMNIST(
    root="data",
    train=False, 
    download=True,
    transform=ToTensor()
)

In [64]:
# visualising one data 


In [65]:
## Break the data down into batches for training

BATCH_SIZE = 32

dataloader_train = DataLoader(data_train,
                              batch_size=BATCH_SIZE,
                              shuffle=True)

dataloader_test = DataLoader(data_test,
                             batch_size=BATCH_SIZE)

In [66]:
x, y = next(iter(dataloader_train))
nn.Flatten()(x).shape

torch.Size([32, 784])

In [67]:
## Creating the model

class FashionMNISTModel(nn.Module):
    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=input_shape, out_features=hidden_units),
            nn.Linear(in_features=hidden_units, out_features=output_shape))
    def forward(self, x: torch.Tensor):
        return self.layer_stack(x)

In [68]:
## Creating the model for our target device

model_0 = FashionMNISTModel(input_shape=784,
                            hidden_units=10,
                            output_shape=10).to(device=device)

In [69]:
## Setting up loss function and optimizer

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.1)

In [70]:
## Training and testing loop

epochs = 3

for epoch in tqdm(range(epochs)):
    print(f"epoch: {epoch}")
    loss_train_cum = 0;
    model_0.train()

    for batch, (X, y) in enumerate(dataloader_train):

        X, y = X.to(device), y.to(device)

        logits_train = model_0(X)

        loss_train = loss_fn(logits_train, y)

        loss_train_cum += loss_train

        optimizer.zero_grad()

        loss_train.backward()

        optimizer.step()

    loss_train_per_batch = loss_train_cum / len(dataloader_train)

    # Testing
    model_0.eval()
    loss_test = 0
    with torch.inference_mode():
        for (X, y) in dataloader_test:
            X, y = X.to(device), y.to(device)
            logits_test = model_0(X)
            loss_test += loss_fn(logits_test, y)
        loss_test_per_batch = loss_test / len(dataloader_test)
    
    print(f"train loss: {loss_train_per_batch} || test loss: {loss_test_per_batch}")


  0%|          | 0/3 [00:00<?, ?it/s]

epoch: 0
train loss: 0.5900816917419434 || test loss: 0.49177682399749756
epoch: 1
train loss: 0.4746129512786865 || test loss: 0.5016408562660217
epoch: 2
train loss: 0.45413026213645935 || test loss: 0.48987898230552673
