In [1]:
# Optimization is adjusted the learned parameters iteratively to improve performance of the model
# Here will be getting a deeper understanding into how optimization works

In [16]:
import torch
from torch import nn
from torchvision.datasets import FashionMNIST
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor

In [17]:
# First handle retreiving the data
train = FashionMNIST(root="data", train=True, transform=ToTensor(), download=True)
test = FashionMNIST(root="data", train=False, transform=ToTensor(), download=True)

train_data = DataLoader(dataset=train, batch_size=64, shuffle=True)
test_data = DataLoader(dataset=test, batch_size=64, shuffle=True)

In [18]:
# Define Model

class FashionMNISTClassifier(nn.Module):
    def __init__(self):
        super(FashionMNISTClassifier, self).__init__()
        self.flatten = nn.Flatten()
        self.sequential = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.sequential(x)
        logits_prob = self.softmax(logits)
        return logits_prob

In [19]:
fmnist_model = FashionMNISTClassifier().to("mps")
print(fmnist_model)

FashionMNISTClassifier(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (sequential): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
  (softmax): Softmax(dim=1)
)


In [36]:
# Hyperparameters are adjustable parameters that let you control the optimization process
# ie, hyperparameter allow you adjust the learning environment

# number of epochs
epochs = 5
batch_size = 64
learning_rate = 1e-2

In [9]:
# Defining the optimization loop
# each iteration through the entire optimization loop is called an epoch

# Each epoch consists of the training loop and the validation loop
# The training loop iterates over the training dataset and tries to converge at the optimal parameters
# The validation/test loop iterates over the test dataset to check if the model performance is improving


In [10]:
# Loss function: loss function is the degree of dissimilirity between our
# predicted output and the actual output, you can see this as the test case for the 
# code our model is trying to write. It is the loss function we are trying to 
# minimize during the optimization

# The most common loss functions are MSE for regression, Negative Log likelihood (NLL) for classificaiton,
# CrossEntropyLoss combines the LogSoftmax adn the NLLLoss (negative log likelihood)



In [37]:
# Optimization is the process of adjusting the model parameters ot reduce the model error.
# The optimization algorithm defines how this process is performed. Stochastic gradient descent is the most popular approach
# Different optimization algorithms work better on differnt problems

# we initialize the optimzer be passing it our model paramters
optimizer = torch.optim.SGD(fmnist_model.parameters(),lr=learning_rate)


In [None]:
# In the training loop optimization happens in 2 steps

# We call the zero_grad method on the optimizer to set the gradients of all 
# the tensors to be optimized, this is because backward propagation is culmunative, 
# and the gradients from the previous training step will be added to the gradients
# of the current training step


# Back propagate the predicted loss with loss.backward() and find the gradient of all the loss with respect to all the parameters

# Once we have our gradient we can now perform optimizer.step() to apply the optimization technique to adjust the parameters

In [26]:
# Implementation of the training process

def train(dataloader:DataLoader, model:nn.Module, optimizer, loss_fn):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to("mps"), y.to("mps")
        pred = model(X)
        loss = loss_fn(pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1)*len(X)
            print(f"The Loss is : {loss} => [{current}/{size}]")


def test(dataloader:DataLoader, model:nn.Module, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0
    model.eval()
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to("mps"), y.to("mps")
            pred =  model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss/=num_batches
    correct /=size
    correct *= 100
    print(f"Test Error | Accuracy : {correct} % Avg loss: {test_loss}")

In [24]:
loss_fn = nn.CrossEntropyLoss()

def iterate(epochs, loss_fn, optimizer, model, training_data, testing_data):
    for t in range(epochs):
        print(f"Epoch {t} ...............................")
        train(training_data, model, optimizer, loss_fn)
        test(testing_data, model, loss_fn)
    print("Done !")

In [40]:
iterate(10, loss_fn, optimizer, fmnist_model, train_data, test_data)

Epoch 0 ...............................
The Loss is : 1.701212763786316 => [64/60000]
The Loss is : 1.6316320896148682 => [6464/60000]
The Loss is : 1.6615177392959595 => [12864/60000]
The Loss is : 1.6755821704864502 => [19264/60000]
The Loss is : 1.6724430322647095 => [25664/60000]
The Loss is : 1.6242613792419434 => [32064/60000]
The Loss is : 1.702429175376892 => [38464/60000]
The Loss is : 1.6630449295043945 => [44864/60000]
The Loss is : 1.6485706567764282 => [51264/60000]
The Loss is : 1.6234943866729736 => [57664/60000]
Test Error | Accuracy : 81.57 % Avg loss: 1.6490007130203732
Epoch 1 ...............................
The Loss is : 1.639133334159851 => [64/60000]
The Loss is : 1.6495221853256226 => [6464/60000]
The Loss is : 1.6556569337844849 => [12864/60000]
The Loss is : 1.6231305599212646 => [19264/60000]
The Loss is : 1.6643898487091064 => [25664/60000]
The Loss is : 1.6299853324890137 => [32064/60000]
The Loss is : 1.6796536445617676 => [38464/60000]
The Loss is : 1.6084