# 1. Importing Data

In [1]:
import os
import requests
import zipfile
from pathlib import Path

# Create Image Directory
DATA_PATH = Path("data/")
IMAGE_PATH = DATA_PATH / "pizza_steak_sushi"

if IMAGE_PATH.is_dir():
  print(f"Directory {IMAGE_PATH} already exists")
else:
  print(f"Creating directory {IMAGE_PATH} ...")
  IMAGE_PATH.mkdir(parents=True, exist_ok=True)

# Import the data zipfile from GitHub
github_url = "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip"
with open(DATA_PATH / "pizza_steak_sushi.zip", "wb") as f:
  request = requests.get(github_url)
  print(f"Fetching Image Data Zip File ...")
  f.write(request.content)


# Open zip file
with zipfile.ZipFile(DATA_PATH / "pizza_steak_sushi.zip", "r") as zip_ref:
  print("Opening Zip File ...")
  zip_ref.extractall(IMAGE_PATH)
  print("Zip file succesfully opened")

os.remove(DATA_PATH / "pizza_steak_sushi.zip")

Creating directory data/pizza_steak_sushi ...
Fetching Image Data Zip File ...
Opening Zip File ...
Zip file succesfully opened


# 2. Creating Python Modules for Model Training

In [2]:
## Creating Parent Directory for Modular Verision of Python Model
import os
os.makedirs("modular_pytorch", exist_ok=True)

In [3]:
# Create dataloader creation function and add it to 'modular_pytorch/data_setup.py'
%%writefile modular_pytorch/data_setup.py
"""
This file contains the functionality for creating PyTorch DataLoaders for
image classification data.
"""


from torchvision import datasets, transforms
from torch.utils.data import DataLoader

def create_dataloaders(train_dir : str, test_dir : str,
                       train_transform : transforms.Compose, test_transform : transforms.Compose,
                       batch_size : int, num_workers : int):
  """
  This function creates dataloaders for training and evaluating a pytorch model

  ------------------------------------------------------------------------------
  Inputs:

  train_dir - The path of the diredctory containing the training image data
  test_dir - The path of the directory containing the testing image data
  train_transform - The transformation we want to apply to the training image data
  test_transform - The transformation we want to apply to the testing image data
  batch_size - The size of the mini-batches in the dataloaders
  num_workers - The number of workers assigned to create the dataloaders

  ------------------------------------------------------------------------------
  Outputs:

  train_dataloader - the training dataloader (with shuffling applied)
  test_dataloader - the testing dataloader
  class_names - the names of the different classes in the training & testing data
  """

  # Create training & testing datasets using `ImageFolder` function
  train_data = datasets.ImageFolder(train_dir,
                                    transform=train_transform,
                                    target_transform=None)

  test_data = datasets.ImageFolder(test_dir,
                                   transform=test_transform)

  # Get class names
  class_names = train_data.classes

  # Create training & testing dataloaders
  train_dataloader = DataLoader(train_data,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=num_workers)

  test_dataloader = DataLoader(test_data,
                               batch_size=batch_size,
                               shuffle=False,
                               num_workers = num_workers)

  return train_dataloader, test_dataloader, class_names

Writing modular_pytorch/data_setup.py


In [4]:
## Create TinyVGG model class & add it to model_builder.py script
%%writefile modular_pytorch/model_builder.py
"""
This file contains the TinyVGG model class
"""

import torch
import torch.nn as nn


class TinyVGG(nn.Module):
  """
  Creates the TinyVGG CNN architecture

  Replicates the TinyVGG architecture from the CNN explainer website in PyTorch.
  See the original architecture here: https://poloclub.github.io/cnn-explainer/

  ---------------------------------------------------------
  Args:

  input_shape - the number of input channels
  output_shape - the number of output channels
  hidden_channels - the number of channels in the hidden layers

  """

  def __init__(self, input_shape : int, output_shape : int, hidden_channels : int=10):
    super().__init__()

    self.conv_layer1 = nn.Sequential(
                                    nn.Conv2d(in_channels=input_shape, out_channels=hidden_channels,
                                              kernel_size=3, stride=1, padding=0),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_channels, out_channels=hidden_channels,
                                              kernel_size=3, stride=1, padding=0),
                                    nn.ReLU(),
                                    nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
                                    )

    self.conv_layer2 = nn.Sequential(
                                    nn.Conv2d(in_channels=hidden_channels, out_channels=hidden_channels,
                                              kernel_size=3, stride=1, padding=0),
                                    nn.ReLU(),
                                    nn.Conv2d(in_channels=hidden_channels, out_channels=hidden_channels,
                                              kernel_size=3, stride=1, padding=0),
                                    nn.ReLU(),
                                    nn.MaxPool2d(kernel_size=2,stride=2,padding=0)
                                    )

    self.classifier_layer = nn.Sequential(
                                          nn.Flatten(),
                                          nn.Linear(in_features=hidden_channels*13*13, out_features=output_shape)
                                          )

  def forward(self, x):
    return self.classifier_layer(self.conv_layer2(self.conv_layer1(x)))

Writing modular_pytorch/model_builder.py


In [5]:
## Create functions for the training and evaluation steps in each epoch, and an overall training function for the entire training process
%%writefile modular_pytorch/engine.py
"""
Contains functions for model training and evaluation within each epoch, and a function for the overall model training across multiple epochs
"""

import torch
import torch.nn as nn

from torch.utils.data import DataLoader

from tqdm.auto import tqdm


def train_step(model : nn.Module,
               train_dataloader : DataLoader,
               device : torch.device,
               optimizer : torch.optim,
               loss_fn):
  """
  Carries out the training step on a pytorch model & calculates training loss & accuracy

  --------------------------------
  Inputs:
  model - the model to be trained
  train_dataloader - the dataloader containing the training data
  device - the device on which the model exists
  optimizer - the optimizer to use for model training
  loss_fn - the loss function used to evaluate the model's success
  acc_fn - the accuracy function used to evaluate the model's success


  --------------------------------
  Outputs: Tuple[loss, acc]
  loss - the training loss of the model
  acc - the training accuracy of the model
  """

  # Initialize loss & accuracy
  train_loss, train_acc = 0, 0

  # Set model to training mode
  model.train()
  for images, labels in train_dataloader:
    images, labels = images.to(device), labels.to(device)  # Move batch images & labels to device

    y_logits = model(images)   # Carry out forward pass

    loss = loss_fn(y_logits, labels)   # Calculate batch loss
    train_loss += loss    # Update overall epoch loss

    # Calculate and accumulate accuracy metric across all batches
    y_pred_class = torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
    train_acc += (y_pred_class == labels).sum().item()/len(y_logits)

    # Carry out model training given batch loss
    optimizer.zero_grad()  # Zero the optimizer gradient
    loss.backward()  # Carry out backpropagation
    optimizer.step()  # Update weights


  train_loss /= len(train_dataloader)  # Calculate average loss across the dataloader images
  train_acc /= len(train_dataloader)   # Calculate average accuracy across the dataloader images

  return train_loss, train_acc



def eval_step(model : nn.Module,
               test_dataloader : DataLoader,
               device : torch.device,
               loss_fn):
  """
  Carries out the evaluation step on a pytorch model using model.eval() mode with torch.inference_mode() by
  calculating the testing loss & accuracy obtained when predicting unseen data classes

  --------------------------------
  Inputs:
  model - the model to be trained
  test_dataloader - the dataloader containing the unseen testing data
  device - the device on which the model exists
  loss_fn - the loss function used to evaluate the model
  acc_fn - the accuracy function used to evaluate the model


  --------------------------------
  Outputs: Tuple[loss, acc]
  test_loss - the loss of the model when prediciting the unseen data classes
  test_acc - the accuracy of the model in predicting the unseen data classes
  """

  # Initialize loss & accuracy
  test_loss, test_acc = 0, 0

  # Set model to evaluation mode
  model.eval()
  with torch.inference_mode():
    for images, labels in test_dataloader:
        images, labels = images.to(device), labels.to(device)   # Move batch images & labels to device

        y_logits = model(images)   # Carry out forward pass

        loss = loss_fn(y_logits, labels)   # Calculate batch loss
        test_loss += loss.item()    # Update overall epoch loss


        test_pred_labels = y_logits.argmax(dim=1)
        test_acc += ((test_pred_labels == labels).sum().item()/len(test_pred_labels))

    test_loss = test_loss / len(test_dataloader)  # Calculate average loss across the dataloader images
    test_acc = test_acc / len(test_dataloader)   # Calculate average accuracy across the dataloader images

  return test_loss, test_acc



def train_model(model : nn.Module, num_epochs : int,
                train_dataloader : DataLoader, test_dataloader : DataLoader,
                optimizer : torch.optim,
                device : torch.device,
                loss_fn):
  """
  Trains the model for a given numebr of epochs & calculates the training and testing loss & accuracy of the model at each epoch in the training process

  ------------------------------------------------------------------------
  Inputs:
  model - model to be trained
  num_epochs - the number of epochs for which we want to train the model
  train_dataloader - dataloader contatining the training data in batched format
  test_dataloader - dataloader containing the testing data in batched format
  optimizer - the optimizer to use for model training
  device - device on which the model exists & should be trained on
  loss_fn - the loss function used to evaluate the model
  acc_fn - the accuracy function used to evaluate the model

  ------------------------------------------------------------------------
  Outputs:
  Tuple of lists in the form [train_losses, train_accs, eval_losses, eval_accs]
  Each element of the tuple is a list containing the following info:
  train_losses - contains the training loss observed at each epoch
  eval_losses - contains the testing loss observed at each epoch
  train_accs - contains the training accuracies observed at each epoch
  eval_losses - contains the testing accuracies observed at each epoch
  """


  # Create lists in which to store model training & evaluation results
  train_losses, train_accs = [], []
  eval_losses, eval_accs = [], []

  for epoch in tqdm(range(num_epochs)):
    train_results = train_step(model,
                               train_dataloader,
                               device, optimizer,
                               loss_fn)

    eval_results = eval_step(model,
                             test_dataloader,
                             device,
                             loss_fn)

    # Print results every 10 epochs
    if epoch % 10 == 0:
      print(f"Epoch : {epoch} | Train Loss = {train_results[0]:.4f}, Train Acc = {train_results[1]:2f} | Test Loss = {eval_results[0]:.4f}, Test Acc = {eval_results[1]:.2f}")

    # Append training & evaluation results to lists
    train_losses.append(train_results[0])
    train_accs.append(train_results[1])

    eval_losses.append(eval_results[0])
    eval_accs.append(eval_results[1])

  return train_losses, train_accs, eval_losses, eval_accs

Writing modular_pytorch/engine.py


# 3. Training Model Using Modules Created Above

In [6]:
try:
  import torchvision
except:
  !pip install torchvision
  import torchvision
# Create training & testing set dataloaders
from modular_pytorch import data_setup

BATCH_SIZE = 8
NUM_WORKERS = 0
standard_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(size=(64,64)),
    torchvision.transforms.ToTensor()
])

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir = "data/pizza_steak_sushi/train",
                                                                               test_dir = "data/pizza_steak_sushi/train",
                                                                               train_transform = standard_transform, test_transform = standard_transform,
                                                                               batch_size = BATCH_SIZE, num_workers = NUM_WORKERS)

In [7]:
img, label = next(iter(train_dataloader))

In [8]:
# Create instance of Model
from modular_pytorch import model_builder
model0 = model_builder.TinyVGG(input_shape = 3, output_shape = 3, hidden_channels = 10)

In [9]:
# Train model for 100 epochs
import torch
import torch.nn as nn
from modular_pytorch import engine

NUM_EPOCHS = 100

# Define `model_train` function arguments
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Training model on {device}")
optimizer = torch.optim.SGD(model0.parameters(),
                            lr=0.001)
loss_fn = nn.CrossEntropyLoss()


# Carry out model training
train_losses, train_accs, eval_losses, eval_accs = engine.train_model(model = model0, num_epochs = NUM_EPOCHS,
                                                                      train_dataloader = train_dataloader, test_dataloader = test_dataloader,
                                                                      device = device, optimizer = optimizer,
                                                                      loss_fn = loss_fn)

Training model on cpu


  0%|          | 0/100 [00:00<?, ?it/s]

Epoch : 0 | Train Loss = 1.0995, Train Acc = 0.340517 | Test Loss = 1.0993, Test Acc = 0.34
Epoch : 10 | Train Loss = 1.0984, Train Acc = 0.362069 | Test Loss = 1.0979, Test Acc = 0.41
Epoch : 20 | Train Loss = 1.0964, Train Acc = 0.336207 | Test Loss = 1.0956, Test Acc = 0.34
Epoch : 30 | Train Loss = 1.0927, Train Acc = 0.349138 | Test Loss = 1.0921, Test Acc = 0.36
Epoch : 40 | Train Loss = 1.0875, Train Acc = 0.405172 | Test Loss = 1.0866, Test Acc = 0.40
Epoch : 50 | Train Loss = 1.0777, Train Acc = 0.413793 | Test Loss = 1.0773, Test Acc = 0.42
Epoch : 60 | Train Loss = 1.0654, Train Acc = 0.443966 | Test Loss = 1.0623, Test Acc = 0.43
Epoch : 70 | Train Loss = 1.0427, Train Acc = 0.456897 | Test Loss = 1.0378, Test Acc = 0.49
Epoch : 80 | Train Loss = 1.0008, Train Acc = 0.543103 | Test Loss = 1.0002, Test Acc = 0.51
Epoch : 90 | Train Loss = 0.9625, Train Acc = 0.530172 | Test Loss = 0.9511, Test Acc = 0.58


# 4. Creating Modules to Save the Trained Model

In [10]:
### Create utils.py script
%%writefile modular_pytorch/utils.py
"""
This script contains various utility functions for PyTorch model training & saving
"""
import torch
from pathlib import Path

def save_model(model : nn.Module,
               save_dir : str,
               model_name : str):
  """
  Saves a PyTorch model to a target directory
  --------------------------------------------------------
  Inputs:
  model - the model to be saved
  save_dir - the directory under which we want to save the model
  model_name - the name we want to assign to the file containing the saved model (should include '.pth' or '.pt' file extension name)
  """

  # Create saving directory
  save_dir_path = Path(save_dir)
  save_dir_path.mkdir(parents=True, exist_ok=True)

  # Create model save path
  assert model_name.endswith('.pth') or model_name.endswith('.pt'), "model_name should end with '.pth' or '.pt' file extension"
  model_save_path = save_dir_path / model_name

  # Save the model's state_dict()
  print(f"[INFO] Saving model to {model_save_path}")
  torch.save(obj=model.state_dict(), f=model_save_path)

Writing modular_pytorch/utils.py


In [11]:
### Create train.py file which carries out entire training, evaluating & saving process
%%writefile modular_pytorch/train.py
"""
Trains a PyTorch image classification model using device-agnostic code.
"""
import os
import torch
from modular_pytorch import data_setup, model_builder, engine, utils

from torchvision import transforms

# Setup Hyperparameters
NUM_EPOCHS = 100
BATCH_SIZE = 8
HIDDEN_CHANNELS = 10
LEARNING_RATE = 0.001

# Setup directories
train_dir = 'data/pizza_sushi_steak/train'
test_dir = 'data/pizza_sushi_steak/test'

# Setup target device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Training Model on {device}")

# Create transforms
std_transform = transforms.Compose([
    transforms.Resize(size=(64,64)),
    transforms.ToTensor()
])

# Create Dataloaders using data_setup.py module
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir = train_dir, test_dir = test_dir,
                                                                               train_transform = std_transform, test_transform = std_transform,
                                                                               batch_size=BATCH_SIZE)

# Create TinyVGG model using model_builder.py
model = TinyVGG(input_shape=3, output_shape=3, hidden_channels=HIDDEN_CHANNELS).to(device)

# Define loss function and optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss().to(device)


# Carry out model training using engine.py
train_losses, train_accs, eval_losses, eval_accs = engine.train_model(model=model, num_epochs=NUM_EPOCHS,
                                                                      train_dataloader=train_dataloader, test_dataloader=test_dataloader,
                                                                      optimizer=optimizer, device=device,
                                                                      loss_fn=loss_fn)

# Save the model using utils.py
utils.save_model(model=model,
                 save_dir = 'saved_models/',
                 model_name = 'TinyVGG_V1.pth')


Writing modular_pytorch/train.py


# 5. Section 5 'Going Modular' Exercises

In [12]:
# 1. Turn 'Importing Data' Section into a python script

# Answer:
%%writefile modular_pytorch/get_data.py
"""
Import training & testing data from github
"""

from pathlib import Path
import zipfile
import os
import requests


def import_data(data_dir : str, img_dir : str,
                raw_data_url : str,
                zipfile_name : str):

  # Create dataset directory
  data_path = Path(data_dir)
  image_path = data_path / img_dir

  if data_path.is_dir():
    print(f"Directory {data_path} already exists")
  else:
    print(f"Creating directory {data_path}")
    data_path.mkdir(parents=True, exists_ok=True)

  # Import image datasets
  assert zipfile_name.endswith('.zip'), "'zipfile_name' argument must end in '.zip'"
  with open(data_path / zipfile_name, "wb") as f:
    request = requests.get(raw_data_url)
    print(f"Importing raw data zipfile to {data_path}")
    f.write(request.content)

  # Unzip the raw image file
  with zipfile.ZipFile(data_path / zipfile_name, "r") as zip_ref:
    print(f"Unzipping {data_path / zipfile_name}")
    zip_ref.extractall(image_path)
    print(f"Successfully unzipped data")

  os.remove(data_path / zipfile_name) # Remove zipfile from data directory

Writing modular_pytorch/get_data.py


In [13]:
# Testing answer
# from modular_pytorch import get_data

# get_data.import_data(data_dir = 'data/', img_dir = 'pizza_sushi_steak',
#                      raw_data_url = "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
#                      zipfile_name = "pizza_sushi_steak.zip")