# Experiment tracking

Experiment tracking can be summed up as what did work, and what didn't work. There are plenty of ways to track experiments, such as with python dictionaries, csv files, printouts, as we've been using so far, but also _[TensorBoard](https://www.tensorflow.org/tensorboard)_, _[Weights & Biases](https://wandb.ai)_, and _[MlFlow](https://mlflow.org)_.

All of these different tools are ways of testing whether an experiment is worth pursuing.


## Setup


In [1]:
import random
import matplotlib.pyplot as plt

from pathlib import Path
from timeit import default_timer as timer

import torch
import torchinfo
import torchvision

import torch.nn as nn
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

from modules import data_setup, engine, utils

### Setting device agnostic code


In [2]:
# Setting mps/cuda as device
if torch.cuda.is_available():
    device_type = "cuda"
else:
    device_type = "mps" if torch.mps.is_available() else "cpu"

device = torch.device(device_type)

## Functions


In [3]:
def create_effnetb0(num_classes: int, device: torch.device | str) -> nn.Module:
    weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
    model = torchvision.models.efficientnet_b0(weights=weights).to(device)

    for param in model.features.parameters():
        param.requires_grad = False

    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=False),
        nn.Linear(in_features=1280, out_features=num_classes, bias=True),
    )

    return model

In [4]:
def create_effnetb2(num_classes: int, device: torch.device | str) -> nn.Module:
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    model = torchvision.models.efficientnet_b2(weights=weights).to(device)

    for param in model.features.parameters():
        param.requires_grad = False

    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=False),
        nn.Linear(in_features=1408, out_features=num_classes, bias=True),
    )

    return model

## Preparing data


In [5]:
# Setting up paths
data_path = Path("../data/")
image_path = data_path / "pizza_steak_sushi"

# Setting up dirs
train_dir = image_path / "train"
test_dir = image_path / "test"

In [6]:
# Getting EfficientNet's weights
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT

# Getting transform from weights
automatic_transforms = weights.transforms()

# Creating DataLoaders
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    batch_size=32,
    train_transform=automatic_transforms,
)

## Getting and setting up a pre-trained model


## Performing different experiments

Ideally, you perform various experiments with a same instance of `SummaryWriter` and save them into different folders within the `runs` directory.


### Import different data splits


In [7]:
# Setting train dir paths
train_dir_10_percent = data_path / "pizza_steak_sushi_10_percent" / "train"
train_dir_20_percent = data_path / "pizza_steak_sushi_20_percent" / "train"

# Setting up test dir paths
test_dir = data_path / "pizza_steak_sushi_10_percent" / "test"

### Creating DataLoaders


In [8]:
# Setting batch size
BATCH_SIZE = 32

# Creating DataLoaders for 10% data
train_dataloader_10p, test_dataloader_10p, class_names = data_setup.create_dataloaders(
    train_dir=train_dir_10_percent,
    test_dir=test_dir,
    batch_size=BATCH_SIZE,
    train_transform=automatic_transforms,
)

# Creating DataLoaders for 10% data
train_dataloader_20p, test_dataloader_20p, class_names = data_setup.create_dataloaders(
    train_dir=train_dir_20_percent,
    test_dir=test_dir,
    batch_size=BATCH_SIZE,
    train_transform=automatic_transforms,
)

### Training different experiments


In [9]:
# Different values of epochs
num_epochs = [15, 30]

# Different model names
model_names = ["effnetb0", "effnetb2"]

# DataLoaders dict
train_dataloaders = {
    "10_p": test_dataloader_10p,
    "20_p": test_dataloader_20p,
}

# Creating experiment counter
experiment_number = 0

In [10]:
for dataloader_name, train_dataloader in train_dataloaders.items():
    for epochs in num_epochs:
        for model_name in model_names:
            experiment_number += 1
            print("#" * 40)
            print(f"[INFO] Experiment number: |        {experiment_number} ")
            print("-" * 40)
            print(f"[INFO] Model:             |   {model_name} ")
            print("-" * 40)
            print(f"[INFO] DataLoader:        | {dataloader_name} ")
            print("-" * 40)
            print(f"[INFO] Number of epochs:  |         {epochs} ")
            print("#" * 40, end="\n\n")

            if model_name == "effnetb0":
                model = create_effnetb0(num_classes=3, device=device)
            else:
                model = create_effnetb2(num_classes=3, device=device)

            # Creating tensorboard writer
            writer = utils.create_writer(
                experiment_name=f"experiment_{experiment_number}",
                model_name=model_name,
                extra=f"{dataloader_name}-{epochs}_epochs",
            )

            loss_fn = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

            # Ensuring model is in device
            model.to(device)

            results = engine.train(
                model=model,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                writer=writer,
                epochs=epochs,
                device=device,
            )

            save_filepath = f"{model_name}-{dataloader_name}-{epochs}.pth"
            utils.save_model(
                model=model,
                target_dir="models",
                model_name=save_filepath,
            )

########################################
[INFO] Experiment number: |        1 
----------------------------------------
[INFO] Model:             |   effnetb0 
----------------------------------------
[INFO] DataLoader:        | 10_p 
----------------------------------------
[INFO] Number of epochs:  |         15 
########################################



  0%|          | 0/15 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.177 |

Train Accuracy:              | 0.186 |            
--------------------------------------
Test Loss:                   | 1.102 |

Test Accuracy:               | 0.266 |
              

Epoch:                       |   2   |  
Train Loss:                  | 0.999 |

Train Accuracy:              | 0.462 |            
--------------------------------------
Test Loss:                   | 1.001 |

Test Accuracy:               | 0.459 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.944 |

Train Accuracy:              | 0.533 |            
--------------------------------------
Test Loss:                   | 0.929 |

Test Accuracy:               | 0.522 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.894 |

Train Accuracy:              | 0.532 |            
--------------------------------------
Test Loss:                   |

  0%|          | 0/15 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.172 |

Train Accuracy:              | 0.125 |            
--------------------------------------
Test Loss:                   | 1.062 |

Test Accuracy:               | 0.522 |
              

Epoch:                       |   2   |  
Train Loss:                  | 1.003 |

Train Accuracy:              | 0.556 |            
--------------------------------------
Test Loss:                   | 0.978 |

Test Accuracy:               | 0.574 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.918 |

Train Accuracy:              | 0.626 |            
--------------------------------------
Test Loss:                   | 0.909 |

Test Accuracy:               | 0.635 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.893 |

Train Accuracy:              | 0.604 |            
--------------------------------------
Test Loss:                   |

  0%|          | 0/30 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.187 |

Train Accuracy:              | 0.238 |            
--------------------------------------
Test Loss:                   | 1.100 |

Test Accuracy:               | 0.352 |
              

Epoch:                       |   2   |  
Train Loss:                  | 1.046 |

Train Accuracy:              | 0.483 |            
--------------------------------------
Test Loss:                   | 0.971 |

Test Accuracy:               | 0.577 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.957 |

Train Accuracy:              | 0.514 |            
--------------------------------------
Test Loss:                   | 0.889 |

Test Accuracy:               | 0.647 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.900 |

Train Accuracy:              | 0.594 |            
--------------------------------------
Test Loss:                   |

  0%|          | 0/30 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.241 |

Train Accuracy:              | 0.146 |            
--------------------------------------
Test Loss:                   | 1.112 |

Test Accuracy:               | 0.395 |
              

Epoch:                       |   2   |  
Train Loss:                  | 1.058 |

Train Accuracy:              | 0.443 |            
--------------------------------------
Test Loss:                   | 1.006 |

Test Accuracy:               | 0.597 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.961 |

Train Accuracy:              | 0.563 |            
--------------------------------------
Test Loss:                   | 0.917 |

Test Accuracy:               | 0.668 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.924 |

Train Accuracy:              | 0.583 |            
--------------------------------------
Test Loss:                   |

  0%|          | 0/15 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.200 |

Train Accuracy:              | 0.188 |            
--------------------------------------
Test Loss:                   | 1.148 |

Test Accuracy:               | 0.270 |
              

Epoch:                       |   2   |  
Train Loss:                  | 1.039 |

Train Accuracy:              | 0.301 |            
--------------------------------------
Test Loss:                   | 0.996 |

Test Accuracy:               | 0.596 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.917 |

Train Accuracy:              | 0.584 |            
--------------------------------------
Test Loss:                   | 0.895 |

Test Accuracy:               | 0.596 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.907 |

Train Accuracy:              | 0.552 |            
--------------------------------------
Test Loss:                   |

  0%|          | 0/15 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.216 |

Train Accuracy:              | 0.248 |            
--------------------------------------
Test Loss:                   | 1.082 |

Test Accuracy:               | 0.423 |
              

Epoch:                       |   2   |  
Train Loss:                  | 1.043 |

Train Accuracy:              | 0.394 |            
--------------------------------------
Test Loss:                   | 0.985 |

Test Accuracy:               | 0.688 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.969 |

Train Accuracy:              | 0.574 |            
--------------------------------------
Test Loss:                   | 0.905 |

Test Accuracy:               | 0.719 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.912 |

Train Accuracy:              | 0.522 |            
--------------------------------------
Test Loss:                   |

  0%|          | 0/30 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.200 |

Train Accuracy:              | 0.238 |            
--------------------------------------
Test Loss:                   | 1.037 |

Test Accuracy:               | 0.546 |
              

Epoch:                       |   2   |  
Train Loss:                  | 1.009 |

Train Accuracy:              | 0.514 |            
--------------------------------------
Test Loss:                   | 0.915 |

Test Accuracy:               | 0.688 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.930 |

Train Accuracy:              | 0.554 |            
--------------------------------------
Test Loss:                   | 0.853 |

Test Accuracy:               | 0.615 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.896 |

Train Accuracy:              | 0.583 |            
--------------------------------------
Test Loss:                   |

  0%|          | 0/30 [00:00<?, ?it/s]


Epoch:                       |   1   |  
Train Loss:                  | 1.261 |

Train Accuracy:              | 0.188 |            
--------------------------------------
Test Loss:                   | 1.174 |

Test Accuracy:               | 0.186 |
              

Epoch:                       |   2   |  
Train Loss:                  | 1.087 |

Train Accuracy:              | 0.454 |            
--------------------------------------
Test Loss:                   | 1.062 |

Test Accuracy:               | 0.288 |
              

Epoch:                       |   3   |  
Train Loss:                  | 0.959 |

Train Accuracy:              | 0.595 |            
--------------------------------------
Test Loss:                   | 0.969 |

Test Accuracy:               | 0.562 |
              

Epoch:                       |   4   |  
Train Loss:                  | 0.904 |

Train Accuracy:              | 0.583 |            
--------------------------------------
Test Loss:                   |

## Viewing results with `tensorboard`


In [12]:
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 10653), started 0:00:08 ago. (Use '!kill 10653' to kill it.)