Here we are following the official tutorial of the PyTorch for training our model with `FashionMNIST` dataset so it can categorize clothes for us.

In [1]:
import torch
import torchvision


# download training dataset
training_data = torchvision.datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=torchvision.transforms.ToTensor()
)


In [2]:
# Download testing dataset
testing_data = torchvision.datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=torchvision.transforms.ToTensor()
)


In [3]:
"""
We pass the Dataset as an argument to DataLoader. This wraps an iterable over our dataset, and supports automatic batching, sampling, shuffling and multiprocess data loading. 
Here we define a batch size of 64, i.e. each element in the dataloader iterable will return a batch of 64 features and labels.
"""

batch_size = 64

#Create the dataloaders
training_dataloader = torch.utils.data.DataLoader(dataset= training_data, batch_size=batch_size)
testing_dataloader = torch.utils.data.DataLoader(dataset=testing_data, batch_size=batch_size)

print("If to look at our test data loader:")
for X, y in testing_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

If to look at our test data loader:
Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [4]:
"""
To define a neural network in PyTorch, we create a class that inherits from nn.Module. We define the layers of the network in the __init__ function and specify how 
data will pass through the network in the 'forward' function. To accelerate operations in the neural network, we move it to an accelerator such as CUDA, MPS, MTIA, or XPU.
If the current accelerator is available, we will use it. Otherwise, we use the CPU.
"""

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"we're using {device} device")

we're using cuda device


In [5]:
# Define the forward pass of our neural network

class NeuralNetwork(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = torch.nn.Flatten()
        self.linear_relu_stack = torch.nn.Sequential(
            torch.nn.Linear(28*28, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 512),
            torch.nn.ReLU(),
            torch.nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
# The loss function and the optimizer we need for training the model
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [7]:
"""
In a single training loop, the model makes predictions on the training dataset (fed to it in batches), and
backpropagates the prediction error to adjust the model’s parameters.
"""
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # compute the prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

In [8]:
# We also check the model’s performance against the test dataset to ensure it is learning.
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [9]:
"""
The training process is conducted over several iterations (epochs). During each epoch, the model learns parameters to make better predictions. We print the
model’s accuracy and loss at each epoch; we’d like to see the accuracy increase and the loss decrease with every epoch.
"""

epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(training_dataloader, model, loss_fn, optimizer)
    test(testing_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.301513 [   64/60000]
loss: 2.288389 [ 6464/60000]
loss: 2.281337 [12864/60000]
loss: 2.278783 [19264/60000]
loss: 2.249014 [25664/60000]
loss: 2.227063 [32064/60000]
loss: 2.235017 [38464/60000]
loss: 2.201739 [44864/60000]
loss: 2.205630 [51264/60000]
loss: 2.188582 [57664/60000]
Test Error: 
 Accuracy: 43.0%, Avg loss: 2.172131 

Epoch 2
-------------------------------
loss: 2.179353 [   64/60000]
loss: 2.165227 [ 6464/60000]
loss: 2.125644 [12864/60000]
loss: 2.137983 [19264/60000]
loss: 2.079976 [25664/60000]
loss: 2.039436 [32064/60000]
loss: 2.058058 [38464/60000]
loss: 1.993897 [44864/60000]
loss: 1.997507 [51264/60000]
loss: 1.935869 [57664/60000]
Test Error: 
 Accuracy: 58.8%, Avg loss: 1.927919 

Epoch 3
-------------------------------
loss: 1.962469 [   64/60000]
loss: 1.922989 [ 6464/60000]
loss: 1.831743 [12864/60000]
loss: 1.852599 [19264/60000]
loss: 1.734272 [25664/60000]
loss: 1.713268 [32064/60000]
loss: 1.714335 [38464/

In [10]:
# A common way to save a model is to serialize the internal state dictionary (containing the model parameters).
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Saved PyTorch Model State to model.pth


In [11]:
# The process for loading a model includes re-creating the model structure and loading the state dictionary into it.
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth", weights_only=True))

<All keys matched successfully>

In [12]:
# This model can now be used to make predictions.
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

model.eval()
x, y = testing_data[0][0], testing_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "Ankle boot", Actual: "Ankle boot"
