<a href="https://colab.research.google.com/github/jo-adithya/computer-vision-experiments/blob/main/notebooks/02_building_computer_vision_models_script_mode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Building Computer Vision Models (Script Mode)

In [1]:
from pathlib import Path

In [2]:
# Setup script folder
script_path = Path("src")
script_path.mkdir(parents=True, exist_ok=True)

## 1. Create `Dataset`s and `DataLoader`s

### 1.1 Create script for creating datalaoders

In [3]:
# Setup path for the data_preparation scripts
data_preparation_path = script_path / "data_preparation"
data_preparation_path.mkdir(parents=True, exist_ok=True)

In [4]:
%%writefile src/data_preparation/create_dataloaders.py
"""
Contains functionality for creating PyTorch DataLoaders.
"""

import os

from torch.utils.data import DataLoader
from torchvision import datasets, transforms

from typing import Tuple


NUM_WORKERS = os.cpu_count()


def create_dataloaders(
    train_path: str,
    test_path: str,
    train_transform: transforms.Compose,
    test_transform: transforms.Compose,
    batch_size: int = 32,
    num_workers: int = NUM_WORKERS
) -> Tuple[DataLoader, DataLoader]:
    """Creates training and testing DataLoaders.

    Parameters
    ----------
    train_path: str
        Path to the training directory.
    test_path: str
        Path to the testing directory.
    train_transform: torchvision.transforms.Compose
        torchvision transforms to be performed on the training data.
    test_transform: torchvision.transforms.Compose
        torchvision transforms to be performed on the testing data.
    batch_size: int
        Number of samples per batch.
    num_workers: int
        Number of workers per DataLoader.

    Returns
    -------
    A tuple of training dataloader and testing dataloader.
    In the form of (train_dataloader, test_dataloader).
    """
    # Create training and testing datasets
    train_data = datasets.ImageFolder(train_path, transform=train_transform)
    test_data = datasets.ImageFolder(test_path, transform=test_transform)

    # Turn datasets into dataloaders
    train_dataloader = DataLoader(
        dataset=train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
    )
    test_dataloader = DataLoader(
        dataset=test_data,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True,
    )

    return train_dataloader, test_dataloader


__all__ = ["create_dataloaders"]

Writing src/data_preparation/create_dataloaders.py


## 2. Building `PyTorch` models

### 2.1 Create script for creating `TinyVGG` model

In [5]:
# Setup path for models script
models_path = script_path / "models"
models_path.mkdir(parents=True, exist_ok=True)

In [6]:
%%writefile src/models/tiny_vgg.py
"""
Contains PyTorch model class to instantiate a TinyVGG model.
"""

from torch import nn


class TinyVGGConvBlock(nn.Module):
    """Creates the TinyVGG convolutional block architecture.

    Parameters
    ----------
    in_channels: int
        Number of input channels.
    out_channels: int
        Number of output channels (filters).
    """
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()

        self.model = nn.Sequential(
            nn.Conv2d(in_channels=in_channels,
                      out_channels=out_channels,
                      kernel_size=3,
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.Conv2d(in_channels=out_channels,
                      out_channels=out_channels,
                      kernel_size=3,
                      stride=1,
                      padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

    def forward(self, x):
        return self.model(x)


class TinyVGG(nn.Module):
    """Creates the TinyVGG model architecture.

    Link: https://poloclub.github.io/cnn-explainer/

    Parameters
    ----------
    input_shape: int
        Number of input channels.
    hidden_units: int
        Number of hidden units between layers.
    output_shape: int
        Number of output units (number of classes).
    """
    def __init__(
        self,
        input_shape: int,
        hidden_units: int,
        output_shape: int,
    ):
        super().__init__()
        self.conv_blocks = nn.Sequential(
            TinyVGGConvBlock(in_channels=input_shape, out_channels=hidden_units),
            TinyVGGConvBlock(in_channels=hidden_units, out_channels=hidden_units),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(
                in_features=13 * 13 * hidden_units,
                out_features=output_shape
            )
        )

    def forward(self, x):
        return self.classifier(self.conv_blocks(x))


__all__ = ["TinyVGG"]

Writing src/models/tiny_vgg.py


## 3. Create core training and testing functions

In [7]:
# Setup path for core function scripts
core_path = script_path / "core"
core_path.mkdir(parents=True, exist_ok=True)

### 3.1 Create script for the training step function

In [8]:
%%writefile src/core/train_step.py
"""
Contains functions for core training loop of a PyToch model for one epoch.
"""

import torch
from torch import nn
from torch.utils.data import DataLoader

from typing import Tuple


def train_step(
    model: nn.Module,
    dataloader: DataLoader,
    loss_fn: nn.Module,
    optimizer: torch.optim.Optimizer,
    device: torch.device,
    verbose: bool = False,
) -> Tuple[float, float]:
    """Train a PyTorch model for a single epoch.

    Parameters
    ----------
    model: nn.Module
        A PyTorch model to be trained.
    dataloader: DataLoader
        A DataLoader to be used for training the model.
    loss_fn: nn.Module
        A PyTorch loss function to calculate the loss on the training data.
    optimizer: torch.optim.Optimizer
        A PyTorch optimizer to help minimize the loss function
    device: torch.device
        Target device to compute on ("cuda", "cpu", etc.)
    verbose: bool
        If true, logs the gradients of the model params once after each epoch.

    Returns
    -------
    A tuple of training loss and training accuracy metrics.
    In the form of (train_loss, train_accuracy).
    """
    # Setup the model to be on training mode
    model.train()

    # Setup training metrics
    train_loss, train_acc = 0, 0

    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # Forward pass
        y_pred = model(X)

        # Calculate the loss
        loss = loss_fn(y_pred, y)

        # Optimize the model params
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update the metrics
        train_loss += loss.item()
        y_pred_classes = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += torch.sum(y_pred_classes == y).item() / len(y_pred)

    # Log gradients once per epoch
    if verbose:
        for name, param in model.named_parameters():
            if param.grad is not None:
                grad_norm = param.grad.norm().item()
                print(f"Layer: {name} | Grad Norm: {grad_norm:.6f}")
            else:
                print(f"Layer: {name} | Grad Norm: None")
        print()

    # Calculate the average metrics across all batches
    train_loss /= len(dataloader)
    train_acc /= len(dataloader)

    return train_loss, train_acc


__all__ = ["train_step"]

Writing src/core/train_step.py


### 3.2 Create script for the testing step function

In [9]:
%%writefile src/core/test_step.py
"""
Contains functions for core testing loop of a PyToch model for one epoch.
"""

import torch
from torch import nn
from torch.utils.data import DataLoader

from typing import Tuple


def test_step(
    model: nn.Module,
    dataloader: DataLoader,
    loss_fn: nn.Module,
    device: torch.device
) -> Tuple[float, float]:
    """Tests a PyTorch model for a single epoch.

    Parameters
    ----------
    model: nn.Module
        A PyTorch model to be tested.
    dataloader: DataLoader
        A DataLoader to be used for testing the model.
    loss_fn: nn.Module
        A PyTorch loss function to calculate the loss on the testing data.
    device: torch.device
        Target device to compute on ("cuda", "cpu", etc.)

    Returns
    -------
    A tuple of testing loss and testing accuracy metrics.
    In the form of (test_loss, test_accuracy).
    """
    # Setup the model to be in testing mode
    model.eval()

    # Setup testing metrics
    test_loss, test_acc = 0, 0

    with torch.inference_mode():
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)

            # Forward pass
            y_pred = model(X)

            # Calculate the loss
            loss = loss_fn(y_pred, y)

            # Update the metrics
            test_loss += loss.item()
            y_pred_classes = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
            test_acc += torch.sum(y_pred_classes == y).item() / len(y_pred)

        # Calculate the average metrics across all batches
        test_loss /= len(dataloader)
        test_acc /= len(dataloader)

    return test_loss, test_acc


__all__ = ["test_step"]


Writing src/core/test_step.py


### 3.3 Create script for the core training loop function

In [10]:
%%writefile src/core/train.py
"""
Contains functions for core testing loop of a PyToch model.
"""

import torch
from torch import nn
from torch.utils.data import DataLoader

from tqdm.auto import tqdm

from typing import Dict, List

from .train_step import train_step
from .test_step import test_step


def train(
    model: nn.Module,
    train_dataloader: DataLoader,
    test_dataloader: DataLoader,
    loss_fn: nn.Module,
    optimizer: torch.optim.Optimizer,
    epochs: int,
    device: torch.device,
    verbose: bool = False,
) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.

    Parameters
    ----------
    model: nn.Module
        A PyTorch model to be trained and tested.
    train_dataloader: DataLoader
        A DataLoader to be used for training the model.
    test_dataloader: DataLoader
        A DataLoader to be used for testing the model.
    loss_fn: nn.Module
        A PyTorch loss function to calculate the loss on both datasets.
    optimizer: torch.optim.Optimizer
        A PyTorch optimizer to help minimize the loss function
    device: torch.device
        Target device to compute on ("cuda", "cpu", etc.)
    verbose: bool
        If true, logs the gradients of the model params once after each epoch.

    Returns
    -------
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for
    each epoch.
    In the form:
    {
        train_loss: [...],
        train_acc: [...],
        test_loss: [...],
        test_acc: [...]
    }
    """
    # Create empty results dictionary
    results = {
        "train_loss": [],
        "train_acc": [],
        "test_loss": [],
        "test_acc": [],
    }

    for epoch in tqdm(range(epochs)):
        print(f"Epoch: {epoch}")
        print(f"---------")

        train_loss, train_acc = train_step(
            model=model,
            dataloader=train_dataloader,
            loss_fn=loss_fn,
            optimizer=optimizer,
            device=device,
            verbose=verbose
        )
        test_loss, test_acc = test_step(
            model=model,
            dataloader=test_dataloader,
            loss_fn=loss_fn,
            device=device
        )

        print(f"Train loss: {train_loss:.4f} | Train acc: {train_acc * 100:.1f}%")
        print(f"Test loss: {test_loss:.4f} | Test acc: {test_acc * 100:.1f}%\n")

        # Update the results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    return results


__all__ = ["train"]

Writing src/core/train.py


## 4. Create a function to save the model

In [11]:
# Setup path for utils script
utils_path = script_path / "utils"
utils_path.mkdir(parents=True, exist_ok=True)

### 4.1 Create script for saving the model

In [12]:
%%writefile src/utils/save_model.py
"""
Contains utility function for saving PyTorch model.
"""

from pathlib import Path

import torch
from torch import nn


def save_model(
    model: nn.Module,
    target_path: str,
    model_name: str,
):
    """Saves a PyTorch model to a target directory.

    Parameters
    ----------
    model: nn.Module
        A PyTorch model to be saved.
    target_path: str
        Path for saving the model to.
    model_name: str
        File name for the saved model.
        Should include ".pth" or ".pt" file extension.
    """
    # Create the target directory if not exists
    target_path = Path(target_path)
    target_path.mkdir(parents=True, exist_ok=True)

    assert model_name.endsWith(".pth") or model_name.endsWith(".pt"),
        "model_name should ends with '.pt' or '.pth'"
    saved_model_path = target_path / model_name

    # Save the model
    print(f"[INFO] Saving model to: {saved_model_path}")
    torch.save(obj=model.state_dict(), f=saved_model_path)
    print("[INFO] Successfully saved the model.")


__all__ = ["save_model"]

Writing src/utils/save_model.py


## 5. Create utility functions for visualizations

In [13]:
# Setup path for utils script
utils_path = script_path / "utils"
utils_path.mkdir(parents=True, exist_ok=True)

### 5.1 Create script for viewing a batch of the dataloader

In [14]:
%%writefile src/utils/view_dataloader.py
"""
Contains functionality for viewing a batch of PyTorch DataLoaders.
"""

import math

from torch.utils.data import DataLoader


def view_dataloader(
    dataloader: DataLoader,
    ncols: int = 8
):
    """
    View a batch of images in the PyTorch DataLoader.

    Parameters
    ----------
    dataloader: DataLoader
        The dataloader that wants to be viewed.
    ncols: int
        Number of matplotlib plot columns.
    """
    images, labels = next(iter(dataloader))
    nrows = math.ceil(len(images) / ncols)

    for i, image in enumerate(images):
        plt.subplot(nrows, ncols, i + 1)
        plt.imshow(image.permute(1, 2, 0))
        plt.set_title(dataloader.classes[labels[i]])
        plt.axis(False)


__all__ = ["view_dataloader"]

Writing src/utils/view_dataloader.py


### 5.2 Create script for plotting loss curves

In [15]:
%%writefile src/utils/plot_loss_curves.py
"""
Contains utility function for plotting the model loss curves from the result dictionary.
"""

import matplotlib.pyplot as plt

from typing import Dict, List


def plot_loss_curves(results: Dict[str, List[float]]):
    """Plot the model loss curves.

    Parameters
    ----------
    results: Dict[str, List[float]]
        Results dictionary from the training step of a model.
        In form of:
        {
            train_loss: [...],
            train_acc: [...],
            test_loss: [...],
            test_acc: [...]
        }
    """
    train_loss = results["train_loss"]
    train_acc  = results["train_acc"]
    test_loss  = results["test_loss"]
    test_acc   = results["test_acc"]

    epochs = range(len(results["train_loss"]))

    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_loss, label="train_loss")
    plt.plot(epochs, test_loss,  label="test_loss")
    plt.title("Loss")
    plt.xlabel("Epochs")
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_acc, label="train_acc")
    plt.plot(epochs, test_acc,  label="test_acc")
    plt.title("Accuracy")
    plt.xlabel("Epochs")
    plt.legend()


__all__ = ["plot_loss_curves"]

Writing src/utils/plot_loss_curves.py


### 5.3 Create script for downloading images from DuckDuckGo

In [16]:
# Setup path custom testing utils scripts
custom_testing_utils_path = utils_path / "custom_testing"
custom_testing_utils_path.mkdir(parents=True, exist_ok=True)

In [17]:
%%writefile src/utils/custom_testing/download_images.py
"""
Contains utility function for downloading images from DuckDuckGo.
"""

import requests
from pathlib import Path

from io import BytesIO
from PIL import Image

from duckduckgo_search import DDGS

from typing import List


def download_images(
    target_path: str,
    keywords: List[str],
    filenames: List[str],
    n: int = 1,
):
    """Download images from DuckDuckGo.

    Parameters
    ----------
    target_path: str
        Path for the images to be stored into.
    keywords: List[str]
        List of image keywords to be searched and dowloaded.
    filenames: List[str]
        List of filename for the downloaded images.
        Just need one filename per keyword.
        If n > 1, images will be saved in
        filename[0]_0.jpg, filename[0]_1.jpg, filename[1]_0.jpg, etc.
    n: int
        Number of images to be downloaded for each keyword
    """
    ddg = DDGS()
    total_images = len(keywords) * n

    # Search for image results
    for i, keyword in enumerate(keywords):
        results = ddg.images(keyword, max_results=n)

        # Download all the images
        for j, result in enumerate(results):
            while True:
                image_url = result["image"]
                response = requests.get(image_url)
                filename = f"{filenames[i]}_{j}.jpg"
                target_image_path = target_path / filenames[i]
                target_image_path.mkdir(parents=True, exist_ok=True)
                if response.status_code == 200:
                    image = Image.open(BytesIO(response.content))
                    image.save(target_image_path / filename)
                    print(f"Downloaded image {filename}")
                    break
                else:
                    print(f"Failed to download image {filename}")


__all__ = ["download_images"]

Writing src/utils/custom_testing/download_images.py


### 5.4 Create script for reading custom images for testing

In [18]:
%%writefile src/utils/custom_testing/read_custom_images.py
"""
Contains utility function for reading custom images for testing
"""

from pathlib import Path

import torch
import torchvision

from typing import List


def read_custom_images(image_path: str) -> List[torch.float]:
    """Read custom images and turn it into PyTorch tensors.

    Parameters
    ----------
    image_path: str
        Path to the custom images that want to be read.

    Returns
    -------
    List of PyTorch tensors corresponding to the custom images.
    """
    custom_images_path = Path(image_path).rglob("*.*")
    images_arr = []

    for i, image_path in enumerate(custom_images_path):
        image_arr = torchvision.io.read_image(str(image_path))
        normalized_image_arr = image_arr.type(torch.float) / 255.
        images_arr.append(normalized_image_arr)

    return images_arr


__all__ = ["read_custom_images"]

Writing src/utils/custom_testing/read_custom_images.py


### 5.5 Create script for predicting custom images

In [19]:
%%writefile src/utils/custom_testing/predict_images.py
"""
Contains utility function for predicting custom images
"""

import math

import torch
from torch import nn
from torchvision import transforms

from .read_custom_images import read_custom_images

from typing import List


def predict_images(
    model: nn.Module,
    image_path: str,
    class_names: List[str],
    transform: transforms.Compose,
    plot_predictions: bool = False,
    device: torch.device,
) -> List[str]:
    """Predict custom images.

    Parameters
    ----------
    model: nn.Module
        A PyTorch model to make the predictions.
    image_path: str
        Path to the images that wanted to be predicted.
    class_names: List[str]
        List of class names associated with the model.
    transform: transforms.Compose
        A transformation function to transform the images into a desired format.
    plot_predictions: bool
        If true, plot the predictions and the images.
    device:
        Target device to compute on ("cuda", "cpu", etc.)

    Returns
    -------
    A list of string containing the predicted class names.
    """
    # Load the images
    images_arr = read_custom_images(image_path=image_path)

    # Transform the images
    batch_images = torch.stack([transform(image) for image in images_arr])

    # Make the predictions
    model.to(device)
    model.eval()
    with torch.inference_mode():
        y_pred = model(batch_images.to(device))
        pred_probs = torch.softmax(y_pred, dim=1)
        pred_labels = torch.argmax(pred_probs, dim=1)

    predictions = [class_names[label.cpu()] for label in pred_labels]

    # Plot the predictions
    if plot_predictions:
        import matplotlib.pyplot as plt
        ncols = 2
        nrows = math.ceil(len(images_arr) / ncols)
        for i, image in enumerate(images_arr):
            plt.subplot(nrows, ncols, i + 1)
            plt.imshow(image.permute(1, 2, 0))
            plt.title(f"{predictions[i]} ({pred_probs[i].max().cpu() * 100:.1f}%)")
            plt.axis(False)

    return predictions


__all__ = ["predict_images"]

Writing src/utils/custom_testing/predict_images.py


## 6. Create `__init__.py` files for easier imports

In [20]:
%%writefile src/core/__init__.py
from .train import train

__all__ = ["train"]

Writing src/core/__init__.py


In [21]:
%%writefile src/data_preparation/__init__.py
from .create_dataloaders import create_dataloaders

__all__ = ["create_dataloaders"]

Writing src/data_preparation/__init__.py


In [22]:
%%writefile src/models/__init__.py
from .tiny_vgg import TinyVGG

__all__ = ["TinyVGG"]

Writing src/models/__init__.py


In [23]:
%%writefile src/utils/__init__.py
from .plot_loss_curves import plot_loss_curves
from .save_model import save_model
from .view_dataloader import view_dataloader

__all__ = [
    "plot_loss_curves",
    "save_model",
    "view_dataloader",
]

Writing src/utils/__init__.py


In [24]:
%%writefile src/utils/custom_testing/__init__.py
from .download_images import download_images
from .predict_images import predict_images

__all__ = [
    "download_images",
    "predict_images",
]

Writing src/utils/custom_testing/__init__.py
