In [39]:
import torch
import torchvision

print(torch.__version__)
print(torchvision.__version__)

2.8.0
0.23.0


In [40]:
import os
os.getcwd()

'/Users/sauravkrishna/Documents/Python_stuffs/projects/deep_learning_ninja/pytorch-deep-learning-main/my_notebooks'

In [41]:
%cd pytorch-deep-learning-main/my_notebooks/

[Errno 2] No such file or directory: 'pytorch-deep-learning-main/my_notebooks/'
/Users/sauravkrishna/Documents/Python_stuffs/projects/deep_learning_ninja/pytorch-deep-learning-main/my_notebooks


  bkms = self.shell.db.get('bookmarks', {})


In [42]:
os.getcwd()

'/Users/sauravkrishna/Documents/Python_stuffs/projects/deep_learning_ninja/pytorch-deep-learning-main/my_notebooks'

In [43]:
from going_modular import engine, data_setup

In [44]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

## 1. get data

- download the pizza, sushi, steak data 
- turn into a function

In [45]:
from pathlib import Path

In [46]:
Path("https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip").name

'pizza_steak_sushi_20_percent.zip'

In [47]:
import os
import zipfile
from pathlib import Path
import requests

def download_data(source: str,
                  destination: str,
                  remove_source: bool=True) -> Path:
    """Downloads data from a source URL and unzips it to a destination."""
    # setup paths to data folder
    data_path = Path("data/")
    image_path = data_path / destination

    if image_path.is_dir():
        print(f"[INFO] {image_path} directory already exists, skipping download.")
    else:
        print(f"[INFO] {image_path} directory does not exist, creating one...")
        image_path.mkdir(parents=True, exist_ok=True)

    # download the target data 
    target_file = Path(source).name
    with open(data_path / target_file, "wb") as f:
        request = requests.get(source)
        print(f"[INFO] Downloading target file from {source}....")
        f.write(request.content)

    with zipfile.ZipFile(data_path / target_file, "r") as zip_ref:
        print(f"[INFO] Unzipping {target_file} data")
        zip_ref.extractall(image_path)
    
    if remove_source:
        os.remove(data_path / target_file)
    
    return image_path






In [48]:
url = "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip"
image_path = download_data(
    url,
    destination="pizza_steak_sushi"
)
image_path

[INFO] data/pizza_steak_sushi directory already exists, skipping download.
[INFO] Downloading target file from https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip....
[INFO] Unzipping pizza_steak_sushi_20_percent.zip data


PosixPath('data/pizza_steak_sushi')

## 2. Create Datasets and Dataloaders


### 2.1 Create DataLoaders with manual transforms

the goal with transforms is to ensure your custom data is formatted in a reproducible way as well as way that will suit the pretrained model.


In [49]:
# setup the directiories
train_dir = image_path / "train"
test_dir = image_path / "test"
train_dir, test_dir

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi/test'))

In [50]:
from torchvision import transforms

In [51]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
normalize

Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

In [52]:
# create transform pipeline manualluy
from torchvision import transforms
manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])
print(f"manually created transforms: {manual_transforms}")

# create dataloaders
from going_modular import data_setup

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transforms,
    batch_size=32,
    num_workers=0
)


train_dataloader, test_dataloader, class_names

manually created transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


(<torch.utils.data.dataloader.DataLoader at 0x107b74e80>,
 <torch.utils.data.dataloader.DataLoader at 0x1084aa370>,
 ['pizza', 'steak', 'sushi'])

### 2.1 Create DataLoaders with auto transforms

In [53]:
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
weights

EfficientNet_B0_Weights.IMAGENET1K_V1

In [54]:
auto_transforms = weights.transforms()
auto_transforms

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [55]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=auto_transforms,
    batch_size=32,
    num_workers=0   
)
train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x1084aa3a0>,
 <torch.utils.data.dataloader.DataLoader at 0x1084aab80>,
 ['pizza', 'steak', 'sushi'])

## 3. Getting a pretrained model, freeze the base layers and change the classifier head


In [56]:
# older medthod
#model = torchvision.models.efficientnet_b0(pretrained=True)

# get the weights first
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
# pass the weights to the model
model = torchvision.models.efficientnet_b0(weights=weights)
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [57]:
for param in model.parameters():
    #print(param)
    param.requires_grad = False


In [58]:
## adjust the classifier head
from torch import nn
set_seeds()
model.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names))
)
model.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=3, bias=True)
)

In [59]:
from torchinfo import summary

summary(model,
input_size=(32, 3, 224, 224),
verbose=0,
col_names=["input_size", "output_size", "num_params", "trainable"],
col_width=20,
row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

## 4. Train the model

In [60]:
## define loss and optimizer
from torch import nn, optim
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),
lr=0.001)
loss_fn, optimizer

(CrossEntropyLoss(),
 Adam (
 Parameter Group 0
     amsgrad: False
     betas: (0.9, 0.999)
     capturable: False
     decoupled_weight_decay: False
     differentiable: False
     eps: 1e-08
     foreach: None
     fused: None
     lr: 0.001
     maximize: False
     weight_decay: 0
 ))

In [61]:
!pip install tensorboard



In [62]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir="runs/efficientnet_experiment_1")
writer

<torch.utils.tensorboard.writer.SummaryWriter at 0x10839ee80>

In [63]:
from going_modular.engine import train_step, test_step
from typing import Dict, List
from tqdm.auto import tqdm

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          writer: SummaryWriter,
          device: torch.device) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        # new: experimental tracking with tensorboard
        writer.add_scalars(
            main_tag="Loss",
            tag_scalar_dict={
                "train_loss": train_loss,
                "test_loss": test_loss
            },
            global_step=epoch
        )
        writer.add_scalars(
            main_tag="Accuracy",
            tag_scalar_dict={
                "train_acc": train_acc,
                "test_acc": test_acc
            },
            global_step=epoch
        )
        writer.add_graph(
            model=model,
            input_to_model=torch.randn(32, 3, 224, 224).to(device)
        )

        writer.close()

    # Return the filled results at the end of the epochs
    return results


In [64]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [65]:
# train the model
set_seeds()
results = train(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    loss_fn=loss_fn,
    optimizer=optimizer,
    epochs=5,
    writer=writer,
    device=device
)

  0%|          | 0/5 [00:00<?, ?it/s]



Epoch: 1 | train_loss: 0.9363 | train_acc: 0.6042 | test_loss: 0.6657 | test_acc: 0.8926
Epoch: 2 | train_loss: 0.6483 | train_acc: 0.8604 | test_loss: 0.5250 | test_acc: 0.8943
Epoch: 3 | train_loss: 0.5283 | train_acc: 0.8583 | test_loss: 0.4478 | test_acc: 0.8926
Epoch: 4 | train_loss: 0.4383 | train_acc: 0.8625 | test_loss: 0.3900 | test_acc: 0.9152
Epoch: 5 | train_loss: 0.3870 | train_acc: 0.8917 | test_loss: 0.3661 | test_acc: 0.8971


In [83]:
%load_ext tensorboard
%tensorboard --logdir runs --port 6006

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 10412), started 0:00:21 ago. (Use '!kill 10412' to kill it.)

## 6. Create a function to prepare a `SummaryWriter` instance

By default a summary writer saves to the `runs/` directory. 
what if we want to save different experiments to different folders?

for examples we'd like to track:
    * experiment date/timestamp
    * experiment name
    * model name
    * experiment hyperparameters

Create  function to create a `SummaryWriter()` instance to take all of these things into account.

So ideally we end up tracking experiments to a dir:
`runs/YYYY-MM-DD/experiment_name/model_name/extra`


In [67]:
def create_writer(experiment_name: str,
model_name: str,
extra: str=None) -> SummaryWriter:
    """Creates a torch.utils.tensorboard.SummaryWriter instance tracking to a specific directory"""
    from datetime import datetime
    import os

    # get timestamp of current date in reverse order
    timestamp = datetime.now().strftime("%Y-%m-%d")

    if extra:
        # create log direc path
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)
    print(f"[INFO] Creating SummaryWriter at: {log_dir}")
    return SummaryWriter(log_dir=log_dir)     

In [68]:
example_writer = create_writer(
    experiment_name="data_10_percent",
    model_name="effnetb0",
    extra="epochs_5")
example_writer

[INFO] Creating SummaryWriter at: runs/2026-01-08/data_10_percent/effnetb0/epochs_5


<torch.utils.tensorboard.writer.SummaryWriter at 0x11835e9d0>

### 6.1 Update the `train()` to include a `writer()`

In [69]:
from going_modular.engine import train_step, test_step
from typing import Dict, List
from tqdm.auto import tqdm

def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          writer: torch.utils.tensorboard.SummaryWriter,
          device: torch.device) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        # new: experimental tracking with tensorboard
        if writer:
            writer.add_scalars(
                main_tag="Loss",
                tag_scalar_dict={
                    "train_loss": train_loss,
                    "test_loss": test_loss
                },
                global_step=epoch
            )
            writer.add_scalars(
                main_tag="Accuracy",
                tag_scalar_dict={
                    "train_acc": train_acc,
                    "test_acc": test_acc
                },
                global_step=epoch
            )
            writer.add_graph(
                model=model,
                input_to_model=torch.randn(32, 3, 224, 224).to(device)
            )

            writer.close()

    # Return the filled results at the end of the epochs
    return results


## 7. Setting up a series of modeeling experiments


### 7.1  what kind odf expeiments to run:

- different models - limitless, but one cant test everything ofc
SO what should we be testigng - at minimum atleast?
- no. of epochs
- no. of hidden layers/units
- change the amount of data
- different model architectures
- different optimizers
- differnt hyperparameter setting ons the model and of the learning algorithm

This is why transfer learning is so powerful!


### 7.2 What are the experiments we are going to test now:
- model size, - EffnetB0 v/s EffnetB2
- Dataset size - 10% vs 20%
- training time - no. of epochs 5 v/s 10


### 7.3 Downloading datasets 
1. "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip"
2. "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip"

In [70]:
# Download 10 percent and 20 percent training data (if necessary)
data_10_percent_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                                     destination="pizza_steak_sushi")

data_20_percent_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip",
                                     destination="pizza_steak_sushi_20_percent")

[INFO] data/pizza_steak_sushi directory already exists, skipping download.
[INFO] Downloading target file from https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip....
[INFO] Unzipping pizza_steak_sushi.zip data
[INFO] data/pizza_steak_sushi_20_percent directory already exists, skipping download.
[INFO] Downloading target file from https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip....
[INFO] Unzipping pizza_steak_sushi_20_percent.zip data


In [71]:
import os
os.getcwd()

'/Users/sauravkrishna/Documents/Python_stuffs/projects/deep_learning_ninja/pytorch-deep-learning-main/my_notebooks'

### 7.4 Transform the datasets and create dataloaders

1. Resize the images to (224, 224)
2. Totensor
3. Normalise so that the inputs have the same data disribution as the pretrained model.

In [72]:
# Setup training directory paths
train_dir_10_percent = data_10_percent_path / "train"
train_dir_20_percent = data_20_percent_path / "train"

# Setup testing directory paths (note: use the same test dataset for both to compare the results)
test_dir = data_10_percent_path / "test"

# Check the directories
print(f"Training directory 10%: {train_dir_10_percent}")
print(f"Training directory 20%: {train_dir_20_percent}")
print(f"Testing directory: {test_dir}")

Training directory 10%: data/pizza_steak_sushi/train
Training directory 20%: data/pizza_steak_sushi_20_percent/train
Testing directory: data/pizza_steak_sushi/test


In [73]:
from torchvision import transforms

# Create a transform to normalize data distribution to be inline with ImageNet
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], # values per colour channel [red, green, blue]
                                 std=[0.229, 0.224, 0.225]) # values per colour channel [red, green, blue]

# Compose transforms into a pipeline
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)), # 1. Resize the images
    transforms.ToTensor(), # 2. Turn the images into tensors with values between 0 & 1
    normalize # 3. Normalize the images so their distributions match the ImageNet dataset 
])

In [74]:
BATCH_SIZE = 32

# Create 10% training and test DataLoaders
train_dataloader_10_percent, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir_10_percent,
    test_dir=test_dir, 
    transform=simple_transform,
    batch_size=BATCH_SIZE
)

# Create 20% training and test data DataLoders
train_dataloader_20_percent, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir_20_percent,
    test_dir=test_dir,
    transform=simple_transform,
    batch_size=BATCH_SIZE
)

# Find the number of samples/batches per dataloader (using the same test_dataloader for both experiments)
print(f"Number of batches of size {BATCH_SIZE} in 10 percent training data: {len(train_dataloader_10_percent)}")
print(f"Number of batches of size {BATCH_SIZE} in 20 percent training data: {len(train_dataloader_20_percent)}")
print(f"Number of batches of size {BATCH_SIZE} in testing data: {len(test_dataloader)} (all experiments will use the same test set)")
print(f"Number of classes: {len(class_names)}, class names: {class_names}")

Number of batches of size 32 in 10 percent training data: 15
Number of batches of size 32 in 20 percent training data: 15
Number of batches of size 32 in testing data: 7 (all experiments will use the same test set)
Number of classes: 3, class names: ['pizza', 'steak', 'sushi']


In [75]:
train_dataloader_20_percent, test_dataloader_20_percent, class_names_20_percent

(<torch.utils.data.dataloader.DataLoader at 0x1066727c0>,
 <torch.utils.data.dataloader.DataLoader at 0x10666a580>,
 ['pizza', 'steak', 'sushi'])

In [76]:
len(train_dataloader_10_percent), len(train_dataloader_20_percent)

(15, 15)

In [84]:
len(train_dataloader_10_percent.dataset), len(train_dataloader_20_percent.dataset)

(480, 450)

In [85]:
train_dataloader_10_percent.dataset.root, train_dataloader_20_percent.dataset.root

(PosixPath('data/pizza_steak_sushi/train'),
 PosixPath('data/pizza_steak_sushi_20_percent/train'))

In [86]:
print(train_dir_10_percent)
print(train_dir_20_percent)
print(train_dataloader_10_percent.dataset.root)
print(train_dataloader_20_percent.dataset.root)
print(len(train_dataloader_10_percent.dataset), len(train_dataloader_20_percent.dataset))


data/pizza_steak_sushi/train
data/pizza_steak_sushi_20_percent/train
data/pizza_steak_sushi/train
data/pizza_steak_sushi_20_percent/train
480 450


In [87]:
import torchvision
from torchinfo import summary

# 1. Create an instance of EffNetB2 with pretrained weights
effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT # "DEFAULT" means best available weights
effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights)

# # 2. Get a summary of standard EffNetB2 from torchvision.models (uncomment for full output)
# summary(model=effnetb2, 
#         input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape"
#         # col_names=["input_size"], # uncomment for smaller output
#         col_names=["input_size", "output_size", "num_params", "trainable"],
#         col_width=20,
#         row_settings=["var_names"]
# ) 

# 3. Get the number of in_features of the EfficientNetB2 classifier layer
print(f"Number of in_features to final layer of EfficientNetB2: {len(effnetb2.classifier.state_dict()['1.weight'][0])}")

Number of in_features to final layer of EfficientNetB2: 1408


In [88]:
summary(model=effnetb2, 
        input_size=(32, 3, 224, 224), # make sure this is "input_size", not "input_shape"
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   True
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1408, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 16, 112

In [89]:
import torchvision
from torch import nn

# Get num out features (one for each class pizza, steak, sushi)
OUT_FEATURES = len(class_names)

# Create an EffNetB0 feature extractor
def create_effnetb0():
    # 1. Get the base model with pretrained weights and send to target device
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)

    # 2. Freeze the base model layers
    for param in model.features.parameters():
        param.requires_grad = False

    # 3. Set the seeds
    set_seeds()

    # 4. Change the classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2),
        nn.Linear(in_features=1280, out_features=OUT_FEATURES)
    ).to(device)

    # 5. Give the model a name
    model.name = "effnetb0"
    print(f"[INFO] Created new {model.name} model.")
    return model

# Create an EffNetB2 feature extractor
def create_effnetb2():
    # 1. Get the base model with pretrained weights and send to target device
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)

    # 2. Freeze the base model layers
    for param in model.features.parameters():
        param.requires_grad = False

    # 3. Set the seeds
    set_seeds()

    # 4. Change the classifier head
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3),
        nn.Linear(in_features=1408, out_features=OUT_FEATURES)
    ).to(device)

    # 5. Give the model a name
    model.name = "effnetb2"
    print(f"[INFO] Created new {model.name} model.")
    return model

In [None]:
effnetb0 = create_effnetb0() 
effnetb2 = create_effnetb2()

### 7.6 Create experiments and set up training code

We've prepared our data and prepared our models, the time has come to setup some experiments!

We'll start by creating two lists and a dictionary:
1. A list of the number of epochs we'd like to test (`[5, 10]`)
2. A list of the models we'd like to test (`["effnetb0", "effnetb2"]`)
3. A dictionary of the different training DataLoaders

In [91]:
# 1. Create epochs list
num_epochs = [5, 10]

# 2. Create models list (need to create a new model for each experiment)
models = ["effnetb0", "effnetb2"]

# 3. Create dataloaders dictionary for various dataloaders
train_dataloaders = {"data_10_percent": train_dataloader_10_percent,
                     "data_20_percent": train_dataloader_20_percent}

In [None]:
%%time
from going_modular.utils import save_model

# 1. Set the random seeds
set_seeds(seed=42)

# 2. Keep track of experiment numbers
experiment_number = 0

# 3. Loop through each DataLoader
for dataloader_name, train_dataloader in train_dataloaders.items():

    # 4. Loop through each number of epochs
    for epochs in num_epochs: 

        # 5. Loop through each model name and create a new model based on the name
        for model_name in models:

            # 6. Create information print outs
            experiment_number += 1
            print(f"[INFO] Experiment number: {experiment_number}")
            print(f"[INFO] Model: {model_name}")
            print(f"[INFO] DataLoader: {dataloader_name}")
            print(f"[INFO] Number of epochs: {epochs}")  

            # 7. Select the model
            if model_name == "effnetb0":
                model = create_effnetb0() # creates a new model each time (important because we want each experiment to start from scratch)
            else:
                model = create_effnetb2() # creates a new model each time (important because we want each experiment to start from scratch)
            
            # 8. Create a new loss and optimizer for every model
            loss_fn = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

            # 9. Train target model with target dataloaders and track experiments
            train(model=model,
                  train_dataloader=train_dataloader,
                  test_dataloader=test_dataloader, 
                  optimizer=optimizer,
                  loss_fn=loss_fn,
                  epochs=epochs,
                  device=device,
                  writer=create_writer(experiment_name=dataloader_name,
                                       model_name=model_name,
                                       extra=f"{epochs}_epochs"))
            
            # 10. Save the model to file so we can get back the best model
            save_filepath = f"07_{model_name}_{dataloader_name}_{epochs}_epochs.pth"
            save_model(model=model,
                       target_dir="models",
                       model_name=save_filepath)
            print("-"*50 + "\n")

[INFO] Experiment number: 1
[INFO] Model: effnetb0
[INFO] DataLoader: data_10_percent
[INFO] Number of epochs: 5
[INFO] Created new effnetb0 model.
[INFO] Creating SummaryWriter at: runs/2026-01-08/data_10_percent/effnetb0/5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]



Epoch: 1 | train_loss: 0.9681 | train_acc: 0.5875 | test_loss: 0.6783 | test_acc: 0.8419
Epoch: 2 | train_loss: 0.6719 | train_acc: 0.8479 | test_loss: 0.5097 | test_acc: 0.8957
Epoch: 3 | train_loss: 0.5350 | train_acc: 0.8646 | test_loss: 0.4291 | test_acc: 0.9091
Epoch: 4 | train_loss: 0.4335 | train_acc: 0.8958 | test_loss: 0.3773 | test_acc: 0.9135
Epoch: 5 | train_loss: 0.3823 | train_acc: 0.9000 | test_loss: 0.3434 | test_acc: 0.9180
[INFO] Saving model to: models/07_effnetb0_data_10_percent_5_epochs.pth
--------------------------------------------------

[INFO] Experiment number: 2
[INFO] Model: effnetb2
[INFO] DataLoader: data_10_percent
[INFO] Number of epochs: 5
[INFO] Created new effnetb2 model.
[INFO] Creating SummaryWriter at: runs/2026-01-08/data_10_percent/effnetb2/5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9845 | train_acc: 0.5375 | test_loss: 0.7641 | test_acc: 0.8642
Epoch: 2 | train_loss: 0.7038 | train_acc: 0.8417 | test_loss: 0.6132 | test_acc: 0.9001
Epoch: 3 | train_loss: 0.5658 | train_acc: 0.8583 | test_loss: 0.5175 | test_acc: 0.8912
Epoch: 4 | train_loss: 0.4506 | train_acc: 0.9146 | test_loss: 0.4670 | test_acc: 0.9046
Epoch: 5 | train_loss: 0.4070 | train_acc: 0.9042 | test_loss: 0.4259 | test_acc: 0.9046
[INFO] Saving model to: models/07_effnetb2_data_10_percent_5_epochs.pth
--------------------------------------------------

[INFO] Experiment number: 3
[INFO] Model: effnetb0
[INFO] DataLoader: data_10_percent
[INFO] Number of epochs: 10
[INFO] Created new effnetb0 model.
[INFO] Creating SummaryWriter at: runs/2026-01-08/data_10_percent/effnetb0/10_epochs


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9681 | train_acc: 0.5875 | test_loss: 0.6783 | test_acc: 0.8419
Epoch: 2 | train_loss: 0.6719 | train_acc: 0.8479 | test_loss: 0.5097 | test_acc: 0.8957
Epoch: 3 | train_loss: 0.5350 | train_acc: 0.8646 | test_loss: 0.4291 | test_acc: 0.9091
Epoch: 4 | train_loss: 0.4335 | train_acc: 0.8958 | test_loss: 0.3773 | test_acc: 0.9135
Epoch: 5 | train_loss: 0.3823 | train_acc: 0.9000 | test_loss: 0.3434 | test_acc: 0.9180
Epoch: 6 | train_loss: 0.3637 | train_acc: 0.9021 | test_loss: 0.3161 | test_acc: 0.9359
Epoch: 7 | train_loss: 0.3366 | train_acc: 0.8958 | test_loss: 0.2930 | test_acc: 0.9269
Epoch: 8 | train_loss: 0.2979 | train_acc: 0.9250 | test_loss: 0.2815 | test_acc: 0.9269
Epoch: 9 | train_loss: 0.2729 | train_acc: 0.9313 | test_loss: 0.2642 | test_acc: 0.9314
Epoch: 10 | train_loss: 0.2795 | train_acc: 0.9208 | test_loss: 0.2520 | test_acc: 0.9269
[INFO] Saving model to: models/07_effnetb0_data_10_percent_10_epochs.pth
------------------------------------

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9845 | train_acc: 0.5375 | test_loss: 0.7641 | test_acc: 0.8642
Epoch: 2 | train_loss: 0.7038 | train_acc: 0.8417 | test_loss: 0.6132 | test_acc: 0.9001
Epoch: 3 | train_loss: 0.5658 | train_acc: 0.8583 | test_loss: 0.5175 | test_acc: 0.8912
Epoch: 4 | train_loss: 0.4506 | train_acc: 0.9146 | test_loss: 0.4670 | test_acc: 0.9046
Epoch: 5 | train_loss: 0.4070 | train_acc: 0.9042 | test_loss: 0.4259 | test_acc: 0.9046
Epoch: 6 | train_loss: 0.3649 | train_acc: 0.9271 | test_loss: 0.3961 | test_acc: 0.9135
Epoch: 7 | train_loss: 0.3336 | train_acc: 0.9104 | test_loss: 0.3799 | test_acc: 0.9046
Epoch: 8 | train_loss: 0.3148 | train_acc: 0.9229 | test_loss: 0.3584 | test_acc: 0.9180
Epoch: 9 | train_loss: 0.3063 | train_acc: 0.9146 | test_loss: 0.3365 | test_acc: 0.9314
Epoch: 10 | train_loss: 0.2744 | train_acc: 0.9333 | test_loss: 0.3400 | test_acc: 0.8926
[INFO] Saving model to: models/07_effnetb2_data_10_percent_10_epochs.pth
------------------------------------

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9679 | train_acc: 0.5583 | test_loss: 0.6544 | test_acc: 0.8600
Epoch: 2 | train_loss: 0.6778 | train_acc: 0.8417 | test_loss: 0.5436 | test_acc: 0.9135
Epoch: 3 | train_loss: 0.5493 | train_acc: 0.8750 | test_loss: 0.4431 | test_acc: 0.9001
Epoch: 4 | train_loss: 0.4622 | train_acc: 0.8938 | test_loss: 0.3995 | test_acc: 0.9091
Epoch: 5 | train_loss: 0.4618 | train_acc: 0.8792 | test_loss: 0.3843 | test_acc: 0.9001
[INFO] Saving model to: models/07_effnetb0_data_20_percent_5_epochs.pth
--------------------------------------------------

[INFO] Experiment number: 6
[INFO] Model: effnetb2
[INFO] DataLoader: data_20_percent
[INFO] Number of epochs: 5
[INFO] Created new effnetb2 model.
[INFO] Creating SummaryWriter at: runs/2026-01-08/data_20_percent/effnetb2/5_epochs


  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9805 | train_acc: 0.5354 | test_loss: 0.7724 | test_acc: 0.8285
Epoch: 2 | train_loss: 0.7483 | train_acc: 0.7917 | test_loss: 0.6445 | test_acc: 0.8703
Epoch: 3 | train_loss: 0.5664 | train_acc: 0.8958 | test_loss: 0.5609 | test_acc: 0.9091
Epoch: 4 | train_loss: 0.5192 | train_acc: 0.8812 | test_loss: 0.4837 | test_acc: 0.9016
Epoch: 5 | train_loss: 0.5245 | train_acc: 0.8479 | test_loss: 0.4661 | test_acc: 0.8882
[INFO] Saving model to: models/07_effnetb2_data_20_percent_5_epochs.pth
--------------------------------------------------

[INFO] Experiment number: 7
[INFO] Model: effnetb0
[INFO] DataLoader: data_20_percent
[INFO] Number of epochs: 10
[INFO] Created new effnetb0 model.
[INFO] Creating SummaryWriter at: runs/2026-01-08/data_20_percent/effnetb0/10_epochs


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.9679 | train_acc: 0.5583 | test_loss: 0.6544 | test_acc: 0.8600
Epoch: 2 | train_loss: 0.6778 | train_acc: 0.8417 | test_loss: 0.5436 | test_acc: 0.9135


In [None]:
# Viewing TensorBoard in Jupyter and Google Colab Notebooks (uncomment to view full TensorBoard instance)
%load_ext tensorboard
%tensorboard --logdir runs