# Experiment Tracing
Experiment Tracking helps figure out what works and what doesn't

## 0. Getting Setup

In [2]:
import torch
import torchvision
import matplotlib.pyplot as plt
from torch import nn
from torchvision import transforms
from torchinfo import summary
from going_modular import data_setup, engine

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
def set_seeds(seed: int=42):
    """Sets random seed for torch operations"""
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

## 1. Get Data

In [5]:
import os
import zipfile
from pathlib import Path
import requests

def download_data(source: str, destination: str, remove_source: bool = True) -> Path:
    """Downloads a zipped dataset from the source and unzips to destination"""
    data_path = Path("data")
    image_path = data_path/destination
    if image_path.is_dir():
        print(f"[INFO] {image_path} directory exists. skipping download...")
        target_file = Path(source).name
    else:
        print(f"[INFO] Did not find {image_path} directory, creating one...")
        image_path.mkdir(parents=True, exist_ok=True)
        target_file = Path(source).name
        with open(data_path/target_file,"wb") as f:
            request = requests.get(source)
            print(f"[INFO] Downloading {target_file} from {source}...")
            f.write(request.content)
        with zipfile.ZipFile(data_path/target_file,"r") as zip_ref:
            print(f"[INFO] Unzipping {target_file} data...")
            zip_ref.extractall(image_path)
        if remove_source:
            os.remove(data_path/target_file)
    return image_path

image_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                           destination="pizza_steak_sushi")
image_path

[INFO] data\pizza_steak_sushi directory exists. skipping download...


WindowsPath('data/pizza_steak_sushi')

## 2. Create Datasets and DataLoaders

In [7]:
train_dir = image_path/"train"
test_dir = image_path/"test"
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
automatic_transforms = weights.transforms()
print(f"Automatically created transforms: {automatic_transforms}")
train_dataloader, test_dataloader, class_names = data_setup.create_dataloader(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               transform=automatic_transforms,
                                                                               batch_size=32)
train_dataloader, test_dataloader

Automatically created transforms: ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)


(<torch.utils.data.dataloader.DataLoader at 0x189f71b8d00>,
 <torch.utils.data.dataloader.DataLoader at 0x189ef8592a0>)

## 3. Getting a pretrained model, freezing the base layers and changing the classifier head

In [8]:
model = torchvision.models.efficientnet_b0(weights=weights).to(device)

In [14]:
summary(model, 
        input_size=(32,3,224,224),
        col_names=["input_size","output_size","num_params","trainable"],
        col_width=10,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape Output Shape Param #    Trainable
EfficientNet (EfficientNet)                                  [32, 3, 224, 224] [32, 1000] --         True
├─Sequential (features)                                      [32, 3, 224, 224] [32, 1280, 7, 7] --         True
│    └─Conv2dNormActivation (0)                              [32, 3, 224, 224] [32, 32, 112, 112] --         True
│    │    └─Conv2d (0)                                       [32, 3, 224, 224] [32, 32, 112, 112] 864        True
│    │    └─BatchNorm2d (1)                                  [32, 32, 112, 112] [32, 32, 112, 112] 64         True
│    │    └─SiLU (2)                                         [32, 32, 112, 112] [32, 32, 112, 112] --         --
│    └─Sequential (1)                                        [32, 32, 112, 112] [32, 16, 112, 112] --         True
│    │    └─MBConv (0)                                       [32, 32, 112, 112] [32, 16, 112, 112] 1,448

In [15]:
for param in model.features.parameters():
    param.requires_grad = False

set_seeds()
model.classifier = torch.nn.Sequential(nn.Dropout(p=0.2,inplace=True),
                                       nn.Linear(in_features=1280, out_features=len(class_names),bias=True)).to(device)


## 4. Train model and track results

In [16]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [17]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter()

In [20]:
from typing import Dict, List
from tqdm import tqdm
from going_modular.engine import train_step, test_step

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device=device) -> Dict[str,List]:
    results = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []}
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                           dataloader=train_dataloader,
                                           loss_fn=loss_fn,
                                           optimizer=optimizer,
                                           device=device)
        test_loss, test_acc = test_step(model=model,
                                        dataloader=test_dataloader,
                                        loss_fn=loss_fn,
                                        device=device)
        print(f"Epoch: {epoch+1} | train_loss: {train_loss:.4f} | train_acc: {train_acc:.4f} | test_loss: {test_loss:.4f} | test_acc: {test_acc:.4f}")

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        writer.add_scalars(main_tag="Accuracy",
                           tag_scalar_dict={"train_acc": train_acc,
                                            "test_acc": test_acc},
                           global_step=epoch)
        writer.add_graph(model=model,
                         input_to_model=torch.randn(32,3,224,224).to(device))

    writer.close()
    return results

In [21]:
set_seeds()
results = train(model=model,
                train_dataloader=train_dataloader,
                test_dataloader=test_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=5,
                device=device)

  0%|                                                                                            | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 1.0924 | train_acc: 0.3984 | test_loss: 0.9132 | test_acc: 0.5398


 20%|████████████████▊                                                                   | 1/5 [00:17<01:11, 17.94s/it]

Epoch: 2 | train_loss: 0.8975 | train_acc: 0.6562 | test_loss: 0.7837 | test_acc: 0.8561


 40%|█████████████████████████████████▌                                                  | 2/5 [00:34<00:51, 17.27s/it]

Epoch: 3 | train_loss: 0.8038 | train_acc: 0.7461 | test_loss: 0.6723 | test_acc: 0.8864


 60%|██████████████████████████████████████████████████▍                                 | 3/5 [00:51<00:34, 17.07s/it]

Epoch: 4 | train_loss: 0.6770 | train_acc: 0.8516 | test_loss: 0.6698 | test_acc: 0.8049


 80%|███████████████████████████████████████████████████████████████████▏                | 4/5 [01:09<00:17, 17.28s/it]

Epoch: 5 | train_loss: 0.7065 | train_acc: 0.7188 | test_loss: 0.6746 | test_acc: 0.7737


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:26<00:00, 17.31s/it]


In [23]:
results

{'train_loss': [1.0923891514539719,
  0.8974885493516922,
  0.8037762567400932,
  0.6769649833440781,
  0.7064977586269379],
 'train_acc': [0.3984375, 0.65625, 0.74609375, 0.8515625, 0.71875],
 'test_loss': [0.9132375717163086,
  0.7837276458740234,
  0.672258714834849,
  0.6698037981987,
  0.6745620767275492],
 'test_acc': [0.5397727272727273,
  0.8560606060606061,
  0.8863636363636364,
  0.8049242424242425,
  0.7736742424242425]}

## 5. View our model's results in TensorBoard

In [25]:
%load_ext tensorboard
%tensorboard --logdir runs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## 6. Create a helper function to build `SummaryWriter()` instances

In [28]:
def create_writer(experiment_name: str,
                  model_name: str,
                  extra: str=None) -> torch.utils.tensorboard.writer.SummaryWriter():
    from datetime import datetime
    import os
    timestamp = datetime.now().strftime("%Y-%m-%d")
    if extra:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)
    print(f"[INFO] Created SummaryWriter, saving to: {log_dir}...")
    return SummaryWriter(log_dir=log_dir)