# 07. PyTorch Experiment Tracking

In [1]:
import torchvision
import torch
from torch import nn
from torchvision import transforms
from torchinfo import summary
from going_modular import data_setup, engine

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
def set_seeds(seed: int=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

In [4]:
set_seeds()

## 1. Get data


In [5]:
import os
import zipfile
from pathlib import Path
import requests

def download_data(
    source: str,
    destination: str,
    remove_source: True
) -> Path:
    data_path = Path("data/")
    image_path = data_path / destination

    if image_path.is_dir():
        print(f"[INFO] {image_path} directory already exists. Skipping download.")
    else:
        print(f"[INFO] Did not find {image_path} directory. Creating now.")
        image_path.mkdir(parents=True, exist_ok=True)

        target_file = Path(source).name
        with open(data_path / target_file, "wb") as f:
            request = requests.get(source)
            print(f"[INFO] Downloading {source} to {data_path / target_file}")
            f.write(request.content)

        with zipfile.ZipFile(data_path / target_file, "r") as zip_ref:
            print(f"[INFO] Extracting {data_path / target_file} to {image_path}")
            zip_ref.extractall(image_path)

        if remove_source:
            os.remove(data_path / target_file)

    return image_path

In [6]:
image_path = download_data(
    source="https://github.com/mrdbourke/pytorch-deep-learning/raw/refs/heads/main/data/pizza_steak_sushi.zip",
    destination="pizza_steak_sushi",
    remove_source=True
)


[INFO] data\pizza_steak_sushi directory already exists. Skipping download.


## 2. Create datasets and dataloaders

## 2.1 Create DataLoaders with manual transforms

The goal with transforms is to ensure your custom data is formatted in a reproducile way as well as a way that will suit pretrained models.

In [7]:
train_dir = image_path / "train"
test_dir = image_path / "test"

In [8]:
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

manual_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

print(f"Manually created transforms: {manual_transforms}")

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir, 
    test_dir, 
    manual_transforms, 
    batch_size=32
)

train_dataloader, test_dataloader, class_names

Manually created transforms: Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


(<torch.utils.data.dataloader.DataLoader at 0x201387f6240>,
 <torch.utils.data.dataloader.DataLoader at 0x201386f3560>,
 ['pizza', 'steak', 'sushi'])

## 2.2 Create dataloaders using automatically created transforms

In [9]:
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

automatic_transforms = weights.transforms()

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir, 
    test_dir, 
    automatic_transforms, 
    batch_size=32
)

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x20138819d90>,
 <torch.utils.data.dataloader.DataLoader at 0x20138819d30>,
 ['pizza', 'steak', 'sushi'])

## 3. Getting a pretrained model, freeze the base layers and change the classifier head

In [10]:
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

In [11]:
for param in model.features.parameters():
    param.requires_grad = False

In [12]:
model.classifier = nn.Sequential(
    nn.Dropout(0.2, inplace=True),
    nn.Linear(1280, len(class_names))
).to(device)

In [13]:
summary(
    model,
    input_size=(32, 3, 224, 224),
    col_names=[
        "input_size",
        "output_size",
        "num_params",
        "trainable"],
    col_width=20,
    row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

## 4. Train a single model and track results

In [14]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [15]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [16]:
from going_modular.engine import train_step, test_step
from tqdm.auto import tqdm
from typing import List, Dict, Tuple

In [17]:
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

    ### NEW: Experiment tracking with TensorBoard
        writer.add_scalars(
            main_tag="Loss", 
            tag_scalar_dict={
                "train_loss": train_loss, 
                "test_loss": test_loss},
            global_step=epoch
        )


        writer.add_scalars(
            main_tag="Accuracy", 
            tag_scalar_dict={
                "train_acc": train_acc, 
                "test_acc": test_acc},
            global_step=epoch
        )

        writer.add_graph(
            model=model,
            input_to_model=torch.randn(32, 3, 224, 224).to(device)
        )

    writer.close()
    ### END NEW

    # Return the filled results at the end of the epochs
    return results

In [18]:
set_seeds()
results = train(
    model=model,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    epochs=10,
    device=device
)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0853 | train_acc: 0.4219 | test_loss: 0.8532 | test_acc: 0.7737


 10%|█         | 1/10 [00:11<01:46, 11.83s/it]

Epoch: 2 | train_loss: 0.9059 | train_acc: 0.6758 | test_loss: 0.8085 | test_acc: 0.7121


 20%|██        | 2/10 [00:23<01:33, 11.65s/it]

Epoch: 3 | train_loss: 0.7440 | train_acc: 0.7773 | test_loss: 0.6371 | test_acc: 0.9062


 30%|███       | 3/10 [00:34<01:20, 11.48s/it]

Epoch: 4 | train_loss: 0.6754 | train_acc: 0.7812 | test_loss: 0.6129 | test_acc: 0.8655


 40%|████      | 4/10 [00:46<01:08, 11.46s/it]

Epoch: 5 | train_loss: 0.6366 | train_acc: 0.8047 | test_loss: 0.6095 | test_acc: 0.8352


 50%|█████     | 5/10 [00:57<00:57, 11.50s/it]

Epoch: 6 | train_loss: 0.5816 | train_acc: 0.8203 | test_loss: 0.5073 | test_acc: 0.9167


 60%|██████    | 6/10 [01:09<00:45, 11.48s/it]

Epoch: 7 | train_loss: 0.5461 | train_acc: 0.7812 | test_loss: 0.5336 | test_acc: 0.8769


 70%|███████   | 7/10 [01:21<00:35, 11.79s/it]

Epoch: 8 | train_loss: 0.4973 | train_acc: 0.8008 | test_loss: 0.4874 | test_acc: 0.8873


 80%|████████  | 8/10 [01:33<00:24, 12.02s/it]

Epoch: 9 | train_loss: 0.4313 | train_acc: 0.9141 | test_loss: 0.4426 | test_acc: 0.8759


 90%|█████████ | 9/10 [01:45<00:11, 12.00s/it]

Epoch: 10 | train_loss: 0.5542 | train_acc: 0.7734 | test_loss: 0.4370 | test_acc: 0.8655


100%|██████████| 10/10 [01:57<00:00, 11.75s/it]


In [19]:
%load_ext tensorboard
%tensorboard --logdir=runs

Reusing TensorBoard on port 6006 (pid 27860), started 5 days, 17:03:34 ago. (Use '!kill 27860' to kill it.)

## 6. Create a function to prepare a `SummaryWriter()` instance

By default the `SummaryWriter()` class saves to `log_dir`.

Save different experiments to different folders? In essence, one experiment = one folder.

For example, track:
* Experiment date / timestamp
* Experiment name
* Model name
* Extra - is there anything else, that should be tracked?

Create a function to create a `SummaryWriter()` instance to take all of things into account. So ideally we end up tracking experiments to a directory:

`runs/YYYY-MM-DD/experiment_name/model_name/extra`

In [23]:
from torch.utils.tensorboard import SummaryWriter

def create_writer(
        experiment_name: str,
        model_name: str,
        extra: str = None
    ):
    """
    Create a TensorBoard SummaryWriter instance.    
    """

    from datetime import datetime
    import os
    
    timestamp = datetime.now().strftime("%Y-%m-%d")

    if extra:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)

    print(f"[INFO] TensorBoard logging directory: {log_dir}")

    return SummaryWriter(log_dir=log_dir)
    

In [25]:
example_writer = create_writer(
    "data_10_per",
    "effnetb0",
    "5_epochs")
example_writer

[INFO] TensorBoard logging directory: runs\2025-02-17\data_10_per\effnetb0\5_epochs


<torch.utils.tensorboard.writer.SummaryWriter at 0x201412c1ee0>

### 6.1 Update `train()` function to include use create_writer

In [28]:
def train(model: torch.nn.Module, 
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device,
          writer: torch.utils.tensorboard.writer.SummaryWriter
          ) -> Dict[str, List[float]]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for 
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]} 
    For example if training for epochs=2: 
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]} 
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        if writer:

        ### NEW: Experiment tracking with TensorBoard
            writer.add_scalars(
                main_tag="Loss", 
                tag_scalar_dict={
                    "train_loss": train_loss, 
                    "test_loss": test_loss},
                global_step=epoch
            )


            writer.add_scalars(
                main_tag="Accuracy", 
                tag_scalar_dict={
                    "train_acc": train_acc, 
                    "test_acc": test_acc},
                global_step=epoch
            )

            writer.add_graph(
                model=model,
                input_to_model=torch.randn(32, 3, 224, 224).to(device)
            )

        writer.close()
    else:
        pass
    
    ### END NEW

    # Return the filled results at the end of the epochs
    return results

## 7. Setting up a series of modelling experiments

### 7.1 What kind of experiments should one run?

The number of machine learning experiments you can run, is like the number of different models you can build... almost limitless.

However, one can't test everything...

What should one test?
* Change the number of epochs
* Change the number of hidden layers/units
* Change the amount of data (at this moment i am using 10%)
* Change learning rates
* Try defferent kinds of data augmentation
* Choose a different model architecture

This is why transfer learning is so powerful, because, it's a working model that one can apply to one's own problem.

### 7.2 What experiment are we running?

I am going to turn three dials:
1. Model size - EffnetB0 vs EffnetB2 (in terms of number of parameters)
2. Dataset size - 10% of pizza, steak, sushi images vs 20% (generally more data = better results)
3. Training time - 5 epochs vs 10 epochs (generally longer training time = better results, up to a point)

To begin, we're still keeping things relatively small, so that the experiments run quickly.

The goal is a model that is well performing, but still small enough to run on a mobile device or web browser, so FoodVision Mini can come to life.

If one had infinit time + compute, you should basically always choose the biggest model and biggest dataset you can.


### 7.3 Download different datasets

We want two datasets:
1. Pizza, steak, sushi 10% - https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip
2. Pizza, steak, sushi 20% - https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip

In [31]:
# Download 10% first (for completeness)
data_10_percent_path = download_data(
    source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
    destination="pizza_steak_sushi_10_per",
    remove_source=False
)

data_20_percent_path = download_data(
    source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip",
    destination="pizza_steak_sushi_20_per",
    remove_source=False
)


[INFO] Did not find data\pizza_steak_sushi_10_per directory. Creating now.
[INFO] Downloading https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip to data\pizza_steak_sushi.zip
[INFO] Extracting data\pizza_steak_sushi.zip to data\pizza_steak_sushi_10_per
[INFO] Did not find data\pizza_steak_sushi_20_per directory. Creating now.
[INFO] Downloading https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip to data\pizza_steak_sushi_20_percent.zip
[INFO] Extracting data\pizza_steak_sushi_20_percent.zip to data\pizza_steak_sushi_20_per


### 7.4 Transform datasets and create `DataLoaders`

1. Resize the images to (224, 224)
2. Make sure image tensor values are between [0, 1]
3. Normalize the images so they have the same data distribution as ImageNet

In [35]:
# Setup training directory paths
train_dir_10 = data_10_percent_path / "train"
train_dir_20 = data_20_percent_path / "train"

# Setup testing directory path
test_dir_10 = data_10_percent_path / "test"

train_dir_10, train_dir_20, test_dir_10

(WindowsPath('data/pizza_steak_sushi_10_per/train'),
 WindowsPath('data/pizza_steak_sushi_20_per/train'),
 WindowsPath('data/pizza_steak_sushi_10_per/test'))

In [36]:
# Setup ImageNet normalization levels
# See here https://pytorch.org/vision/0.12/models.html
normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)

# Compose transforms into a pipeline
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

In [37]:
BATCH_SIZE = 32

In [42]:
# Create dataloaders
train_dataloader_10, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir_10, 
    test_dir_10, 
    simple_transform, 
    batch_size=BATCH_SIZE
)

train_dataloader_20, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir_20, 
    test_dir_10, 
    simple_transform, 
    batch_size=BATCH_SIZE
)

print(f"Number of batches of size {BATCH_SIZE} in 10% train data: {len(train_dataloader_10)}")
print(f"Number of batches of size {BATCH_SIZE} in 20% train data: {len(train_dataloader_20)}")
print(f"Number of batches of size {BATCH_SIZE} in test data: {len(test_dataloader)}")
print(f"Class names: {class_names}")

Number of batches of size 32 in 10% train data: 8
Number of batches of size 32 in 20% train data: 15
Number of batches of size 32 in test data: 3
Class names: ['pizza', 'steak', 'sushi']


### 7.5 Create featuer extractor models

We want two functions:
1. Creates a `torchvision.models.efficientnet_b0()` feature extractor with a frozen backbone/base layers and a custom classifier head.
2. Creates a `torchvision.models.efficientnet_b2()` feature extractor with a frozen backbone/base layers and a custom classifier head.

In [46]:
import torchvision

# Create an EffnetB2
effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights).to(device)
effnetb2

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [52]:
summary(
    model=effnetb2,
    input_size=(32, 3, 224, 224),
    verbose=0,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 1000]           --                   True
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1408, 7, 7]     --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   864                  True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   64                   True
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 16, 112

In [50]:
import torchvision
from torch import nn

OUT_FEATURES = len(class_names)

# Create a EffNetB0 feature extractor
def create_effnetb0():
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)

    for param in model.parameters():
        param.requires_grad = False

    set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(0.2, inplace=True),
        nn.Linear(1280, OUT_FEATURES)
    ).to(device)

    model.name = "effnetb0"
    print(f"[INFO] Created model: {model.name}")
    return model

In [None]:
# Create a EffNetB0 feature extractor
def create_effnetb2():
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)

    for param in model.parameters():
        param.requires_grad = False

    set_seeds()
    model.classifier = nn.Sequential(
        nn.Dropout(0.3, inplace=True),
        nn.Linear(1408, OUT_FEATURES)
    ).to(device)

    model.name = "effnetb2"
    print(f"[INFO] Created model: {model.name}")
    return model

In [57]:
created_model_test_effnetb2 = create_effnetb2()
created_model_test_effnetb0 = create_effnetb0()

[INFO] Created model: effnetb2
[INFO] Created model: effnetb0


In [None]:
summary(
    model=created_model_test_effnetb2,
    input_size=(32, 3, 224, 224),
    verbose=0,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1408, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 

In [58]:
summary(
    model=created_model_test_effnetb0,
    input_size=(32, 3, 224, 224),
    verbose=0,
    col_names=["input_size", "output_size", "num_params", "trainable"],
    col_width=20,
    row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224]    [32, 3]              --                   Partial
├─Sequential (features)                                      [32, 3, 224, 224]    [32, 1280, 7, 7]     --                   False
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224]    [32, 32, 112, 112]   --                   False
│    │    └─Conv2d (0)                                       [32, 3, 224, 224]    [32, 32, 112, 112]   (864)                False
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112]   [32, 32, 112, 112]   (64)                 False
│    │    └─SiLU (2)                                         [32, 32, 112, 112]   [32, 32, 112, 112]   --                   --
│    └─Sequential (1)                                        [32, 32, 112, 112]   [32, 