Python scripts are required for large scale projects as they improve reproduceability.

Normally, write a `notebook` for experimentation and visualisation, if its working, turn the most useful pieceds of code to python `scripts`.

In this notebook, I am going to reproduce `04_custom_datasets.ipynb` but in `script mode`.

#### 1. Get Data
Since may vary every time, not required to make it into a script

In [1]:
import os
import requests
import zipfile
from pathlib import Path

In [4]:
# data path
data_path = Path("../Dataset/")

# image path
img_path = data_path / "pizza_steak_sushi"

if img_path.is_dir():
    print("Folder already exists, skipping download ...")
else:
    print("Folder doesn't exist, creating one ...")
    
    # create image folder
    img_path.mkdir(parents=True)

    # download the data
    with open(data_path / "pizza_steak_sushi.zip", "wb") as f:
        request = requests.get("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip")
        print("Downloading data ...")

        # write data
        f.write(request.content)

    # extract zip file
    with zipfile.ZipFile(data_path / "pizza_steak_sushi.zip", "r") as f:
        print("Unzipping the file ...")

        # extract file
        f.extractall(img_path)

    # delete zipfile
    os.remove(data_path / "pizza_steak_sushi.zip")

Folder doesn't exist, creating one ...
Downloading data ...
Unzipping the file ...


In [13]:
# path for saving scripts
Path.mkdir("turning_modular", exist_ok=True)

# 2 Data processing
Note: we skip data exploration as we already have done it in `04` notebook.

### a. Create datasets (convert to tensors and manipulate if required)

### b. Creating datasets to dataloader

In [15]:
%%writefile turning_modular/data_setup.py
'''
Functionality for creating pyTorch datasets and dataloader for image classification'''
import os
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# number of cores - useful for performing parallel jobs
n_workers = os.cpu_count()

def create_dataset_dataloader(train_dir : str, test_dir : str,
                              transform : transforms.Compose,
                              bach_size : int, n_workers : int = n_workers):
    '''Creates training and testing dataloaders

    Args:
        train_dir : path of training data
        test_dir  : path of testing data
        transform : transformations to be performed
        batch_size: mini-batch size
        n_workers : number of cores per dataloader

    Return:
        A tuple of train_dataloader, test_dataloader, class_names
        train_dataloader : dataloader of training data
        test_dataloader  : dataloader of testing data
        class_names      : list of label class names'''


    # imageFolder to create datasets
    # training dataset
    train_data = datasets.ImageFolder(root=train_dir, transform=transform)

    # testing dataset
    test_data = datasets.ImageFolder(root=test_dir, transform=transform)

    # class names
    class_names = train_data.classes

    # dataset to dataloader
    # training dataloader
    train_dataloader = DataLoader(dataset=train_data, batch_size=batch_size,
                                  shuffle=True, num_workers=n_workers, 
                                  pin_memory=True) # pin_memory - quicker transfer from cpu to gpu

    # testing dataloader
    test_dataloader = DataLoader(dataset=test_data, batch_size=batch_size,
                                  shuffle=False, num_workers=n_workers, 
                                  pin_memory=True) # no need to shuffle

    return train_dataloader, test_dataloader, class_names

Overwriting turning_modular/data_setup.py


# 3. Build Model
Make `TinyVGG()` model created in `03, 04 notebooks` into a script

In [21]:
%%writefile turning_modular/model_builder.py
'''
Functionality to instantiate TinyVGG model'''

import torch
from torch import nn

class TinyVGG(nn.Module):
    '''Creates TinyVGG Architecture by replicating https://poloclub.github.io/cnn-explainer/
    Args:
        in_channels : number of input channels (eg: 3 for RGB)
        hid_units   : number of hidden channels or nodes for each layer
        out_classes : number of classes for classification
        image_shape : height or width of image

    Creates:
        model instance and can predictions'''

    def __init__(self, in_channels: int, hid_units: int,
                 out_classes: int, image_shape: int):
        super().__init__()

        PADDING, STRIDE, KERNEL, DILATION = 1, 2, 3, 1 # dilation: 1 is default

        # 1st block
        self.conv_block_1 = nn.Sequential(nn.Conv2d(in_channels=inp_features,
                                                    out_channels=hidden_units,
                                                   kernel_size=KERNEL, stride=1,
                                                   padding=PADDING),
                                          nn.ReLU(),
                                          nn.Conv2d(in_channels=hidden_units,
                                                    out_channels=hidden_units,
                                                    kernel_size=KERNEL, stride=STRIDE,
                                                    padding=PADDING),
                                          nn.ReLU())
        
        
        # required to compute input shape of classifier layer
        H_out, W_out = H_W_out(image_shape, PADDING, KERNEL, STRIDE, DILATION)

        # 2nd block
        self.conv_block_2 = nn.Sequential(nn.Conv2d(in_channels=hidden_units,
                                                    out_channels=hidden_units,
                                                    kernel_size=KERNEL, stride=1,
                                                    padding=PADDING),
                                          nn.ReLU(),
                                          nn.Conv2d(in_channels=hidden_units,
                                                    out_channels=hidden_units,
                                                    kernel_size=KERNEL, stride=STRIDE,
                                                    padding=PADDING),
                                          nn.ReLU())
        
        H_out, W_out = H_W_out(H_out, PADDING, KERNEL, STRIDE, DILATION)
        
        # output classifier layer
        self.classifier = nn.Sequential(nn.Flatten(), 
                                        nn.Linear(in_features=hidden_units*H_out*W_out,
                                                  out_features=out_shape))

    
    def forward(self,x):

        # operating w/o storing is faster - operation fusion
        return self.classifier(self.conv_block_2(self.conv_block_1(x)))

Overwriting turning_modular/model_builder.py


# 4. Training

Build functions created in `04`,
1. **train_step**
2. **test_step**

Finally combine both to,
1. **train** - perform training for certain number of epochs

In [23]:
%%writefile turning_modular/engine.py
'''functionality for training the model and evaluating it'''

import torch
from tqdm import tqdm

# training step
def train_step(model: torch.nn.Module, data_loader: torch.utils.data.DataLoader,
             loss_fn : torch.nn.Module, optimizer : torch.optim.Optimizer,
             accuracy_fn, device : torch.device = device):
    '''Trains a model for one epoch by preparing model for training and train the model 
    by calcuating the loss and computing the gradients of loss function

    Args:
        model      : model to be trained
        data_loader: dataloader instance of data
        loss_fn    : lost function to be minimised
        optimizer  : optmizer to minimize the loss function
        device     : cpu (or) cuda
    
    returns:
        Tuple of loss and accuracy
        train_loss    : Average training loss
        train_accuracy: Average training accuracy'''

    # training and test loss initialise
    train_loss, train_acc = 0, 0

    # model into training mode
    model.train()

    for X, y in data_loader:

        # data on target device
        X, y = X.to(device), y.to(device)
        
        # 1. forward pass
        y_pred = model(X)

        # 2. calculate loss and accuracy per batch
        loss = loss_fn(y_pred, y)
        train_loss += loss
        
        train_acc += accuracy_fn(y_true=y,
                                 y_pred=y_pred.argmax(dim=1)) # Go from logits -> pred labels

        # 3.optimiser zero grad
        optim.zero_grad()

        # 4.loss backward
        loss.backward()

        # 5. step
        optim.step()

    # average training loss
    train_loss /= len(data_loader)
    train_acc /= len(data_loader)

    return train_loss, train_acc

def test_step(model : torch.nn.Module,
              data_loader : torch.utils.data.DataLoader,
              loss_fn : torch.nn.Module,
              accuracy_fn,
              device : torch.device = device):

    '''Tests model performance for single epoch

    Args:
        model      : model to be trained
        data_loader: dataloader instance of data
        loss_fn    : lost function to be minimised
        device     : cpu (or) cuda
    
    returns:
        Tuple of loss and accuracy
        train_loss    : Average training loss
        train_accuracy: Accuracy of training'''

    # training and test loss initialise
    test_loss, test_acc = 0, 0

    # model in evaluation mode
    model.eval()

    with torch.inference_mode():
        for X, y in data_loader:

            # data on target device
            X, y = X.to(device), y.to(device)

            # 1. forward pass
            test_pred = model(X)

            # calculate loss
            test_loss += loss_fn(test_pred, y)

            test_acc += accuracy(y_true=y, y_pred=test_pred.argmax(dim=1)) # to get the labels and get the accuracy
    
        # avg test loss
        test_loss /= len(data_loader)
    
        # average accuracy
        test_acc /= len(data_loader)

    return test_loss, test_acc

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          epochs:int, device:torch.device):
    '''Trains and tests model for given number of epochs
    
    Args:
        model: pytorch model to be trained and tested
        train_data_loader: training dataloader instance of data
        test_data_loader: testing dataloader instance of data
        loss_fn    : lost function to be minimised
        optimizer  : optmizer to minimize the loss function
        epochs     : number of times a data is to be used for training
        device     : cpu (or) cuda

    returns:
        dictionary of training and testing loss and accuracy'''

    
    results = {"train_loss": [], "train_acc": [],
               "test_loss": [], "test_acc": []}

    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model, dataloader=train_dataloader,
                                         loss_fn=loss_fn, optimizer=optimizer,
                                         device=device)
        test_loss, test_acc = test_step(model=model, dataloader=test_dataloader,
                                      loss_fn=loss_fn, device=device)
        
        print(f"Model: {model.__class__.__name__} | train loss: {train_loss:.4f} | Train acc: {train_acc:.4f}%")

        print(f"Model: {model.__class__.__name__} | Test loss: {test_loss:.4f}, test accuracy: {test_acc}% \n")

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    return results

Writing turning_modular/engine.py


# 5. Saving the model

Its common practise to store helper functions in `utils.py`

In [24]:
%%writefile turning_modular/utils.py
'''
Contains various utility function for model training and saving'''

import torch
from pathlib import Path

def save_model(model: torch.nn.Module, target_dir: str,
               model_name: str):
    '''Saves a PyTorch model to a target directory.
    Args:
        model: model to save.
        target_dir: directory for saving the model.
        model_name: filename to be given to the model with extension.'''

    # Create target directory
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True, exist_ok=True)

    if not model_name.endswith(".pth"):
        return "Not .pth extension. change extension"
    else:
        model_save_path = target_dir_path / model_name

        print("Saving model ...")
        torch.save(obj=model.state_dict(), f= model_save_path)

Writing turning_modular/utils.py


# 6. Perform training at once

In [37]:
%%writefile turning_modular/train.py
"""
Trains a PyTorch image classification model using device-agnostic code.
"""
import argparse
import os
import torch
import data_setup, engine, model_builder, utils

from torchvision import transforms

parser = argparse.ArgumentParser(description='Image Classification Data Loader')

# Add arguments
parser.add_argument('--train_dir', type=str, required=True,
                    help='Path to the training data directory')
parser.add_argument('--test_dir', type=str, required=True,
                    help='Path to the testing data directory')
parser.add_argument('--batch_size', type=int, default=32,
                    help='Mini-batch size (default: 32)')
parser.add_argument('--num_workers', type=int, default=os.cpu_count(),
                    help='Number of worker threads for data loading (default: number of CPU cores)')
parser.add_argument('--n_epochs', type=int, default=5,
                    help='Number of epochs (default: 5)')
parser.add_argument('--h_units', type=int, default=16,
                    help='Number of hidden units in hidden layer (default: 16)')
parser.add_argument('--lr', type=float, default=0.1,
                    help='learning rate (default: 0.1)')
parser.add_argument('--model_name', type=str, help='Model name', required=True)

# Parse the arguments
args = parser.parse_args()

# Setup hyperparameters
n_epochs = 5
batch_size = 16
h_units = 10
lr = 0.01

# Setup directories
train_dir = train_dir
test_dir = test_dir

# Setup target device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Create transforms
data_transform = transforms.Compose([
  transforms.Resize((64, 64)),
  transforms.ToTensor()
])

# Create DataLoaders with help from data_setup.py
train_dataloader, test_dataloader, \
class_names = data_setup.create_dataset_dataloader(train_dir=train_dir,
                                            test_dir=test_dir,
                                            transform=data_transform,
                                            batch_size=batch_size, n_workers=n_workers)

# Create model with help from model_builder.py
model = model_builder.TinyVGG(in_channels=3, hid_units=hid_units,
                              out_classes=len(class_names), image_shape=64).to(device)

# Set loss and optimizer
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=lr)

# training and testing with help from engine.py
engine.train(model=model,
             train_dataloader=train_dataloader,
             test_dataloader=test_dataloader,
             loss_fn=loss_fn,
             optimizer=optimizer,
             epochs=n_epochs,
             device=device)

# Save the model with help from utils.py
utils.save_model(model=model,
                 target_dir="models",
                 model_name=model_name)

Overwriting turning_modular/train.py


In [6]:
# run as shell script
!python ./turning_modular/train.py --model "TinyVGG_script.pth" --batch_size 32 --lr 0.01 --n_epochs 3 --model_name "TinyVGG_script.pth" --train_dir "../Dataset/pizza_steak_sushi/train" --test_dir "../Dataset/pizza_steak_sushi/test" --num_workers 2 --h_units 10