In [2]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import numpy as np

In [3]:
from torchvision.transforms import v2

In [94]:
transforms = transforms.Compose([
                                 transforms.ToTensor()
])

In [95]:
train = datasets.MNIST("", train=True, transform=transforms, download=True)
train, valid = random_split(train, [50000, 10000])


In [96]:
trainloader = DataLoader(dataset=train, batch_size=16)
validloader = DataLoader(dataset=valid, batch_size=16)

In [116]:
import torch.nn as nn
import torch.nn.functional as F

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(28*28, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, 10)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = x.view(x.shape[0], -1)  # Flatten the images
        x = F.leaky_relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.leaky_relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.leaky_relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x

model = Network()

if torch.cuda.is_available():
    model = model.cuda()

model

Network(
  (fc1): Linear(in_features=784, out_features=512, bias=True)
  (bn1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=512, out_features=256, bias=True)
  (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=256, out_features=128, bias=True)
  (bn3): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc4): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [118]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)

In [119]:
from tqdm import tqdm
epochs = 5

for e in range(epochs):
    train_loss = 0.0
    for data, labels in tqdm(trainloader):
        # Transfer Data to GPU if available
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()
        
        # Clear the gradients
        optimizer.zero_grad()
        # Forward Pass
        target = model(data)
        # Find the Loss
        loss = criterion(target,labels)
        # Calculate gradients 
        loss.backward()
        # Update Weights
        optimizer.step()
        # Calculate Loss
        train_loss += loss.item()
    
    print(f'Epoch {e+1} \t\t Training Loss: {train_loss / len(trainloader)}')

    valid_loss = 0.0
    total_correct = 0
    total_samples = 0
    model.eval()
    for data, labels in tqdm(validloader):
        if torch.cuda.is_available():
            data, labels = data.cuda(), labels.cuda()
        
        target = model(data)

        loss = criterion(target, labels)
        valid_loss += loss.item()

        _, predicted = torch.max(target, 1)
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)
    
    accuracy = 100 * total_correct / total_samples

    print(f'Epoch {e+1} \t\t Valid Loss: {valid_loss / len(validloader)}')
    print(f'Epoch {e+1} \t\t Valid accuracy: {accuracy:.2f}%')


100%|██████████| 3125/3125 [00:30<00:00, 101.50it/s]


Epoch 1 		 Training Loss: 0.5319087144267559


100%|██████████| 625/625 [00:02<00:00, 282.72it/s]


Epoch 1 		 Valid Loss: 0.163544378516078
Epoch 1 		 Valid accuracy: 95.12%


100%|██████████| 3125/3125 [00:31<00:00, 98.70it/s] 


Epoch 2 		 Training Loss: 0.12712911989283748


100%|██████████| 625/625 [00:01<00:00, 404.77it/s]


Epoch 2 		 Valid Loss: 0.13132612868305296
Epoch 2 		 Valid accuracy: 95.84%


100%|██████████| 3125/3125 [00:31<00:00, 100.59it/s]


Epoch 3 		 Training Loss: 0.08536323610623134


100%|██████████| 625/625 [00:02<00:00, 300.08it/s]


Epoch 3 		 Valid Loss: 0.10315553907565772
Epoch 3 		 Valid accuracy: 96.67%


100%|██████████| 3125/3125 [00:29<00:00, 105.55it/s]


Epoch 4 		 Training Loss: 0.061468556621604366


100%|██████████| 625/625 [00:01<00:00, 388.75it/s]


Epoch 4 		 Valid Loss: 0.11359863578426885
Epoch 4 		 Valid accuracy: 96.87%


100%|██████████| 3125/3125 [00:32<00:00, 96.90it/s] 


Epoch 5 		 Training Loss: 0.04740870903202711


100%|██████████| 625/625 [00:01<00:00, 412.78it/s]

Epoch 5 		 Valid Loss: 0.10214469307471882
Epoch 5 		 Valid accuracy: 97.03%





# train step function

In [None]:
def train_step(model: torch.nn.Module, 
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer) -> float:
    """ Train loop function

    Args:
        model (torch.nn.Module): Model 
        dataloader (torch.utils.data.DataLoader): dataloader (data, label)
        loss_fn (torch.nn.Module): loss function
        optimizer (torch.optim.Optimizer): optimizer for updating weights

    Returns:
        [float, float]: train loss, train accuracy 
    """
    # Put model in train mode
    model.train()
    
    # Setup train loss and train accuracy values
    train_loss, train_acc = 0, 0
    
    # Loop through data loader data batches
    for batch, (X, y) in enumerate(dataloader):
        # Send data to target device
        X, y = X.to(device), y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        # 2. Calculate  and accumulate loss
        loss = loss_fn(y_pred, y)
        train_loss += loss.item() 

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # Calculate and accumulate accuracy metric across all batches
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    # Adjust metrics to get average loss and accuracy per batch 
    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_loss, train_acc

# test step function

In [None]:
def test_step(model: torch.nn.Module, 
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module) -> [float, float]:
    """Test loop function for evaluate model

    Args:
        model (torch.nn.Module): trained model
        dataloader (torch.utils.data.DataLoader): test dataloader
        loss_fn (torch.nn.Module): loss in test sample

    Returns:
        [float, float]: test_loss, test_accuracy
    """
    # Put model in eval mode
    model.eval() 
    
    # Setup test loss and test accuracy values
    test_loss, test_acc = 0, 0
    
    # Turn on inference context manager
    with torch.inference_mode():
        # Loop through DataLoader batches
        for batch, (X, y) in enumerate(dataloader):
            # Send data to target device
            X, y = X.to(device), y.to(device)
    
            # 1. Forward pass
            test_pred_logits = model(X)

            # 2. Calculate and accumulate loss
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()
            
            # Calculate and accumulate accuracy
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))
            
    # Adjust metrics to get average loss and accuracy per batch 
    test_loss = test_loss / len(dataloader)
    test_acc = test_acc / len(dataloader)
    return test_loss, test_acc

# train and test loop

In [5]:
from tqdm.auto import tqdm

# 1. Take in various parameters required for training and test steps
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module = nn.CrossEntropyLoss(),
          epochs: int = 5)-> float:
    """Train and test loop

    Args:
        model (torch.nn.Module): model
        train_dataloader (torch.utils.data.DataLoader): train dataloader
        test_dataloader (torch.utils.data.DataLoader): test dataloader
        optimizer (torch.optim.Optimizer): optimizer. Defaults SGD()
        loss_fn (torch.nn.Module, optional): _description_. Defaults to nn.CrossEntropyLoss().
        epochs (int, optional): _description_. Defaults to 5.

    Returns:
        _type_: 
    """
    
    # 2. Create empty results dictionary
    results = {"train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": []
    }
    
    # 3. Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer)
        test_loss, test_acc = test_step(model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn)
        
        # 4. Print out what's happening
        print(
            f"Epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

        # 5. Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    # 6. Return the filled results at the end of the epochs
    return results