# 07 PyTorch Experiment Tracking

Machine learning is very experimental.

In order to figure out which experiments are worth pursuing; **experiment tracking** comes in, it helps you to figure out what doesn't work

In this norebook, we're going to see an example of programmatically tracking experiments

Resources:  
- https://www.learnpytorch.io/07_pytorch_experiment_tracking/
- Extra: https://madewithml.com/courses/mlops/experiment-tracking/


In [None]:
import torch
import torchvision

print(torch.__version__)
print(torchvision.__version__)

In [None]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular.going_modular import data_setup, engine
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine

In [None]:
#setup device agnositic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
# Set seeds
def set_seeds(seed: int=42):
    """Sets random sets for torch operations.

    Args:
        seed (int, optional): Random seed to set. Defaults to 42.
    """
    # Set the seed for general torch operations
    torch.manual_seed(seed)
    # Set the seed for CUDA torch operations (ones that happen on the GPU)
    torch.cuda.manual_seed(seed)

## 1. Getting the data

Want to get the pizza, steak and sushi images.

So we can run experiments building FoodVision Mini and see which model performs best.


In [None]:
import os
import requests
from pathlib import Path
import zipfile

def download_data(source: str, destination: str,
                  remove_source: bool=True)->Path:
    """Downloads a zipfile from a url and stores it locally"""
    #Set up path to data folder
    #source: https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip
    data_path=Path("data/")
    image_path=data_path/destination
    # If the image path doesn't exist, download it
    if image_path.is_file():
      print(f"[INFO] {image_path} directory already exists, skipping download")

    else:
      image_path.mkdir(parents=True, exist_ok=True)
      # Download the data
      target_file=Path(source).name
      with open(data_path/target_file, "wb") as f:
        request=requests.get(source)
        print(f"[INFO] Downloading {target_file} to {source}...")
        f.write(request.content)

      with zipfile.ZipFile(data_path/target_file, "r") as zip_ref:
        print(f"[INFO] Extracting {target_file}...")
        zip_ref.extractall(image_path)

      #remove zipfile if needed
      if remove_source:
        os.remove(data_path/target_file)

    return image_path



In [None]:
image_path=download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                        destination="pizza_steak_sushi")

### 2.1 Create DataLoaders with manual transforms
The goal with transforms is to ensure your custom data is formatted in a reproducible way as well as a way that will be used by your model.

In [None]:
#create train_dir, test_dir
train_dir=image_path/"train"
test_dir=image_path/"test"

train_dir, test_dir

In [None]:
#Setup ImageNet normalization levels
#see 06 notebook for explanation on normalization
normalize=transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])

from torchvision import datasets, transforms
manual_transform=transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

print (f"manually created transforms: {manual_transform}")
from going_modular.going_modular import data_setup

# Create training and testing DataLoader's as well as get a list of class names
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               transform=manual_transform, # resize, convert images to between 0 & 1 and normalize them
                                                                               batch_size=32) # set mini-batch size to 32

train_dataloader, test_dataloader, class_names

2.2 Create Dataloaders using automatically created transforms

The same principle applies for automatic transforms: we want our custom data in the same format as a pretrained model was trained on.

In [None]:

#get a set of pretrained model weights (plenty of these weights are available in torchvision.models)
weights=torchvision.models.EfficientNet_B0_Weights.DEFAULT #DEFAULT = best available weights
weights

# Get the transforms used to create our pretrained weights
auto_transforms=weights.transforms()
auto_transforms

print (f"automatically created transforms: {auto_transforms}")

train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                               test_dir=test_dir,
                                                                               transform=auto_transforms, # resize, convert images to between 0 & 1 and normalize them
                                                                               batch_size=32) # set mini-batch size to 32

train_dataloader, test_dataloader, class_names

## 3. Getting a pretrained model, freeze base layers and change classifier head

In [None]:
weights=torchvision.models.EfficientNet_B0_Weights.DEFAULT
model=torchvision.models.efficientnet_b0(weights=weights)
model

In [None]:
#print with torchinfo
from torchinfo import summary
summary(model, input_size=(1,3,224,224), #example of [batch_size, color_channels, height, width]
        col_names=["input_size","output_size","num_params","trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
for param in model.features.parameters():
    #print(param)
    param.requires_grad=False

In [None]:
#update classifier head of our model to suit our problem
from torch import nn

set_seeds()

#Dropout - https://pytorch.org/docs/stable/generated/torch.nn.Dropout.html
model.classifier=nn.Sequential(
    nn.Dropout(p=0.3), #see data scient cheatsheet for visual display of dropout
    nn.Linear(in_features=1280, #feature vector coming in
              out_features=len(class_names))).to(device) #how many classes do we have

model.classifier

## 4. Train a single model and track results

to track experiments, were using TensorBoard: https://www.tensorflow.org/tensorboard

And to interact with TensorBoard, we can sue SummaryWriter: https://pytorch.org/docs/stable/tensorboard.html


In [None]:
#Define loss function and optimizer
loss_fn=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(params=model.parameters(), lr=0.001)

In [None]:
#set up summarywriter:-https://pytorch.org/tutorials/recipes/recipes/tensorboard_with_pytorch.html
from torch.utils.tensorboard import SummaryWriter
writer=SummaryWriter()

In [None]:
from going_modular.going_modular.engine import train_step, test_step

from tqdm.auto import tqdm
from typing import Dict, List, Tuple

#take train step from engine.py #need to update so it uses a summary writer

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]}
    For example if training for epochs=2:
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]}
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Make sure model on target device
    model.to(device)

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        ## New: experiment tracking##
        writer.add_scalars(main_tag="Loss",
                           tag_scalar_dict={"train_loss":train_loss,
                                           "test_loss":test_loss},
                           global_step=epoch)
        writer.add_scalars(main_tag="Accuracy",
                           tag_scalar_dict={"train_acc":train_acc,
                                           "test_acc":test_acc},
                           global_step=epoch)

        writer.add_graph(model=model,
                         input_to_model=torch.zeros((1, 3, 224, 224)).to(device))

        #close the writer
        writer.close()
        ### End new ###

    # Return the filled results at the end of the epochs
    return results


In [None]:
#train model
#note: not using engine.py since we've updated the train function
set_seeds()
train_results=train(model=model,
                   train_dataloader=train_dataloader,
                   test_dataloader=test_dataloader,
                   optimizer=optimizer,
                   loss_fn=loss_fn,
                   epochs=5,
                   device=device)

train_results

## 5. View our model's results with TensorBoard

There are a few ways to view TensorBoard results: https://www.learnpytorch.io/07_pytorch_experiment_tracking/#5-view-our-models-results-in-tensorboard

In [None]:
#Let's view our experiments within the notebook
# Load the TensorBoard notebook extension
%load_ext tensorboard
%tensorboard --logdir runs

## 6. Create a function to prepare a `SummaryWriter()` instance

By default our `SummaryWriter()` class saves to `log_dir`

How about if we wanted to save different experiemtns to different folders?

In sessence, one experiments  means one folder.

For example we'd like to track:
* Exeperiment date/timestamp
* Experiment name
* Model name
* Extra - is there anything else that should be tracked?

Let's create a function to create a `SummaryWriter()` instance to take all of these things into account.

So ideally we end up tracking experiments to a directory:

`runs/YYYY-MM-DD/experiment_name/model_name/extra`

In [None]:
from torch.utils.tensorboard import SummaryWriter

def create_writer(experiment_name: str,
                  model_name: str,
                  extra: str =None):

  "creates a torch.utils.tensorboard.SummaryWriter() instance tracking to specific"

  from datetime import datetime
  import os

  #get timestamp of current date in reverse order
  timestamp=datetime.now().strftime("%Y-%m-%d")

  if extra:
    #create log directory path
    log_dir=os.path.join("runs", timestamp, experiment_name, model_name, extra)
  else:
    log_dir=os.path.join("runs", timestamp, experiment_name, model_name)

  print(f"[INFO] Created SummaryWriter saving to {log_dir}")

  return SummaryWriter(log_dir=log_dir)

In [None]:
example_writer=create_writer(experiment_name="data_10_percent",
                           model_name="effnet_b0",
                           extra="5_epochs")

### 6.1 Update the `train()` function to include `writer` parameter

In [None]:
#update train() to use create_writer...

from going_modular.going_modular.engine import train_step, test_step

from tqdm.auto import tqdm
from typing import Dict, List, Tuple

#take train step from engine.py #need to update so it uses a summary writer

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device,
          writer: torch.utils.tensorboard.writer.SummaryWriter) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Args:
    model: A PyTorch model to be trained and tested.
    train_dataloader: A DataLoader instance for the model to be trained on.
    test_dataloader: A DataLoader instance for the model to be tested on.
    optimizer: A PyTorch optimizer to help minimize the loss function.
    loss_fn: A PyTorch loss function to calculate loss on both datasets.
    epochs: An integer indicating how many epochs to train for.
    device: A target device to compute on (e.g. "cuda" or "cpu").

    Returns:
    A dictionary of training and testing loss as well as training and
    testing accuracy metrics. Each metric has a value in a list for
    each epoch.
    In the form: {train_loss: [...],
              train_acc: [...],
              test_loss: [...],
              test_acc: [...]}
    For example if training for epochs=2:
             {train_loss: [2.0616, 1.0537],
              train_acc: [0.3945, 0.3945],
              test_loss: [1.2641, 1.5706],
              test_acc: [0.3400, 0.2973]}
    """
    # Create empty results dictionary
    results = {"train_loss": [],
               "train_acc": [],
               "test_loss": [],
               "test_acc": []
    }

    # Make sure model on target device
    model.to(device)

    # Loop through training and testing steps for a number of epochs
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc = train_step(model=model,
                                          dataloader=train_dataloader,
                                          loss_fn=loss_fn,
                                          optimizer=optimizer,
                                          device=device)
        test_loss, test_acc = test_step(model=model,
          dataloader=test_dataloader,
          loss_fn=loss_fn,
          device=device)

        # Print out what's happening
        print(
          f"Epoch: {epoch+1} | "
          f"train_loss: {train_loss:.4f} | "
          f"train_acc: {train_acc:.4f} | "
          f"test_loss: {test_loss:.4f} | "
          f"test_acc: {test_acc:.4f}"
        )

        # Update results dictionary
        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)

        ## New: experiment tracking##
        if writer:
          writer.add_scalars(main_tag="Loss",
                            tag_scalar_dict={"train_loss":train_loss,
                                            "test_loss":test_loss},
                            global_step=epoch)
          writer.add_scalars(main_tag="Accuracy",
                            tag_scalar_dict={"train_acc":train_acc,
                                            "test_acc":test_acc},
                            global_step=epoch)

          writer.add_graph(model=model,
                          input_to_model=torch.zeros((1, 3, 224, 224)).to(device))

          #close the writer
          writer.close()
        else:
          pass
          ### End new ###

    # Return the filled results at the end of the epochs
    return results


## 7. Setting up a series of modelling experiments

* Setup 2x modelling experiments with pizza, steak, sushi data and train one model for 5 epochs and another model for 10 epochs.

In [None]:
epochs=[5,10]
set_seeds()
for epoch in epochs:
  print(f"training model for {epoch} epochs")
  #train model
  #note: not using engine.py since we've updated the train function
  epoch_writer=create_writer(experiment_name=f"data_at_{epoch}_epochs",
                model_name="effnet_b0",
                extra=f"{epoch}_epochs")

  train_results=train(model=model,
                    train_dataloader=train_dataloader,
                    test_dataloader=test_dataloader,
                    optimizer=optimizer,
                    loss_fn=loss_fn,
                    epochs=epoch, #whoops forgot to change epoch hyperparam first time round
                    device=device,
                    writer=epoch_writer)

  print(f"{epoch} epochs complete")

### 7.1 What kind of experiments should you run?

The number of ML experiments you can run is like the number of models you can build...almost limitless.

However, you can't test everything...

So what do we test?
* Change the number of epochs
* Change the number of hidden layers/units
* Change the amount of data (right now we're using 10% of the food 101 dataset for Pizza steak and sushi)
* Change learning rate
* Change optimizer
* Change different model architecture
* Try different data augmentation

This is why transfer learning is so powerful, it's a working model that you can apply to your own work                                                                                       

### 7.2 What experiment are we going to run?

We're looking to turn three dials:
1. Model size - EffnetB0 vs EffnetB2 (in terms of number of params)
2. Datasetsize - 10% of PSS images vs 20% (generally more data=better results)
3. Training time - 5epochs vs 10 epochs (generally longer training time=better results, up to a point)

To begin we're keeping things relatively small so our experiments run quickly

Our goal: a model that is well performing but still small enough to run on a mobile device or web browser, so FoodVisionMini can come to life.

If you had infinite compute + time you should basically always choose the biggest model and biggest dataset you can.

Seee:
http://www.incompleteideas.net/IncIdeas/BitterLesson.html

### 7.3 Download different datasets

We want two datasets:

1. 10% of pizza, steak, sushi: https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip
2. 20% of pizza, steak, sushi: https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip

They were created with: https://github.com/mrdbourke/pytorch-deep-learning/blob/main/04_pytorch_custom_datasets.ipynb

In [None]:
#Download 10% and 20% data
data_10_percent_path=download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                                   destination="pizza_steak_sushi")

data_20_percent_path=download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip",
                                   destination="pizza_steak_sushi_20_percent")

### 7.4 Transform Datasets and Create DataLoaders

We'll need to transform our data in a few ways:

1. Resize the images to (224,224)
2. Make sure image tensor values are between [0,1]
3. Normalize the images so they have the same data distribution as ImageNet

when using pretrained models/transfer learning, musttransform data same as what model was trained on.

In [None]:
#setup training directory paths
train_dir_10_percent=data_10_percent_path/"train"
train_dir_20_percent=data_20_percent_path/"train"

#set up test directory
test_dir=data_10_percent_path/"test"

train_dir_10_percent, train_dir_20_percent, test_dir

In [None]:
from torchvision import transforms

#setup Imagenet normalization levels
#see here: https://pytorch.org/vision/0.12/models.html
normalize=torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                            std=[0.229, 0.224, 0.225])


#compose transform
simple_transform=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    normalize
])

In [None]:
BATCH_SIZE=32

from going_modular.going_modular.data_setup import create_dataloaders

#CREATE 10% TRAINING AND TEST dATAlOADERS

train_dataloader_10_percent, test_dataloader, class_names = create_dataloaders(
    train_dir=train_dir_10_percent,
    test_dir=test_dir,
    transform=simple_transform,
    batch_size=BATCH_SIZE)

train_dataloader_20_percent, test_dataloader, class_names = create_dataloaders(
    train_dir=train_dir_20_percent,
    test_dir=test_dir,
    transform=simple_transform,
    batch_size=BATCH_SIZE)

print(f"number of batches of size {BATCH_SIZE} in training dataloader 10% percent: {len(train_dataloader_10_percent)}")
print(f"number of batches of size {BATCH_SIZE} in test dataloader 10% percent: {len(test_dataloader)}")
print(f"number of batches of size {BATCH_SIZE} in training dataloader 20% percent: {len(train_dataloader_20_percent)}")

### 7.5 Create feature extraction models

We want two functions:
1. Create a `torchvision.models.efficientnet_b0()` feature extractor with a frozen backbone/base layers and a custom classifier head (EffNetB0)
2. Create a `torchvision.models.efficientnet_b2()` feature extractor with a frozen backbone/base layers and a custom classifier head (EffNetB2)

In [None]:
import torchvision

#create an EffNetB2
effnetb2_weights=torchvision.models.EfficientNet_B2_Weights.DEFAULT
effnetb2=torchvision.models.efficientnet_b2(weights=effnetb2_weights)
effnetb2

In [None]:
#print with torchinfo
from torchinfo import summary
summary(effnetb2, input_size=(1,3,224,224), #example of [batch_size, color_channels, height, width]
        col_names=["input_size","output_size","num_params","trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
import torchvision
from torch import nn

OUT_FEATURES=len(class_names)

#create an EffNetB0 feature extractor

def create_effnetb0():
  #get weights and setup a model
  weights=torchvision.models.EfficientNet_B0_Weights.DEFAULT
  model=torchvision.models.efficientnet_b0(weights=weights).to(device)

  #freeze base layers
  for param in model.parameters():
    param.requires_grad=False

  #change classifier head
  set_seeds()
  model.classifier=nn.Sequential(
      nn.Dropout(p=0.2),
      nn.Linear(in_features=1280,
                out_features=OUT_FEATURES).to(device)
  )

  #give model name
  model.name="effnetb0"
  print(f"[INFO] Create {model.name} model...")

  return model

In [None]:
created_model_test=create_effnetb0()

In [None]:
import torchvision
from torch import nn

OUT_FEATURES=len(class_names)

#create an EffNetB0 feature extractor

def create_effnetb2():
  #get weights and setup a model
  weights=torchvision.models.EfficientNet_B2_Weights.DEFAULT
  model=torchvision.models.efficientnet_b2(weights=weights).to(device)

  #freeze base layers
  for param in model.parameters():
    param.requires_grad=False

  #change classifier head
  set_seeds()
  model.classifier=nn.Sequential(
      nn.Dropout(p=0.3),
      nn.Linear(in_features=1408,
                out_features=OUT_FEATURES).to(device)
  )

  #give model name
  model.name="effnetb2"
  print(f"[INFO] Create {model.name} model...")

  return model

In [None]:
created_model_test_effnetb2=create_effnetb2()

### 7.6 Create experiments and set up training code

In [None]:
#Create epoch list
num_epochs=[5,10]

#create model list (need to create a new model for each experiment)
models=["effnetb0", "effnetb2"]

#Create a DataLoaders dictionary
train_dataloaders={
    "data_10_percent":train_dataloader_10_percent,
    "data_20_percent":train_dataloader_20_percent
}

In [None]:
%%time
from going_modular.going_modular.utils import save_model

#set seeds
set_seeds()

#keep track of experiment numbers
experiment_number=0

#loop through each DataLoader
for dataloader_name, train_dataloader in train_dataloaders.items():
  #loop through the epochs
  for epochs in num_epochs:
    #loop through each model name and create a new model instance
    for model_name in models:

      #print out info
      experiment_number+=1
      print(f"[INFO] Experiment number: {experiment_number}")
      print(f"[INFO] Model: {model_name} ")
      print(f"[INFO] DataLoader: {dataloader_name}")
      print(f"[INFO] Epochs: {epochs}")


      #select and create the model
      if model_name=="effnetb0":
        model=create_effnetb0()
      else:
        model=create_effnetb2()

      #create a new loss and optimizer for every model
      loss_fn=torch.nn.CrossEntropyLoss()
      optimizer=torch.optim.Adam(params=model.parameters(),
                                 lr=0.001)

      #Train target model with target dataloader and track experiments
      train(model=model,
            train_dataloader=train_dataloader,
            test_dataloader=test_dataloader,
            optimizer=optimizer,
            loss_fn=loss_fn,
            epochs=epochs,
            device=device,
            writer=create_writer(experiment_name=dataloader_name,
                                 model_name=model_name,
                                 extra=f"{epochs} epochs"))


      #save the model to file so we can import it later

      save_filepath=f"07_{model_name}_{dataloader_name}_{epochs}_epochs.pth"

      save_model(model=model,
                target_dir="models",
                model_name=save_filepath)

      print(f"-"*50 +"\n")

## 8. View experiments in TensorBoard

We've followed the principle experiment, experiment, experiment...

Now let's visualise, visualise, visualise...

In [None]:
#Let's view experiments within TensorBoard from within the notebook
%load_ext tensorboard
%tensorboard --logdir=runs

The best performing model was:
* Model: EffNetB2
* DataLoader: 20% of pizza, steak, sushi
* Epochs: 10

And the overall trend of all the results was that more data, bigger model and longer training time generally led to better results.

In [None]:
# # Upload the results to TensorBoard.dev (uncomment to try it out)
# !tensorboard dev upload --logdir runs \
#     --name "07. PyTorch Experiment Tracking: FoodVision Mini model results" \
#     --description "Comparing results of different model size, training data amount and training time."



Running the cell above results in the experiments from this notebook being publically viewable at: https://tensorboard.dev/experiment/VySxUYY7Rje0xREYvCvZXA/

    Note: Beware that anything you upload to tensorboard.dev is publically available for anyone to see. So if you do upload your experiments, be careful they don't contain sensitive information.



## 9. Load in the best Model and make predictions with it

This is our best model filepath: `models/07_effnetb2_data_20_percent_10_epochs.pth`

In [None]:
# Setup the best model filepath
best_model_path = "models/07_effnetb2_data_20_percent_10_epochs.pth"

# Instantiate a new instance of EffNetB2 (to load the saved state_dict() to)
best_model = create_effnetb2()

# Load the saved best model state_dict()
best_model.load_state_dict(torch.load(best_model_path))

Our goal is to create a FoodVision Mini model that performs well enough and is able to run on a mobile device/web browser.

In [None]:
#check model file size
from pathlib import Path

#Get the model size in bytes then convert it to megabytes
effnetb2_model_size=Path(best_model_path).stat().st_size//(1024*1024)
print(f"Model size: {effnetb2_model_size} MB")

In [None]:
#Import function to make predictions on images

In [None]:
from going_modular.going_modular.predictions import pred_and_plot_image

#get random list of 3 image path names from test dataset
import random
num_images_to_plot=3
random_image_paths=list(Path(data_20_percent_path/"test").glob("*/*.jpg"))
test_image_path_sample=random.sample(random_image_paths,
                                     k=num_images_to_plot)


for image_path in test_image_path_sample:
  pred_and_plot_image(model=best_model,
                      image_path=image_path,
                      class_names=class_names,
                      image_size=(224,224))

### 9.1 Predict on a custom image with best model

In [None]:
# Download custom image
import requests

# Setup custom image path
custom_image_path = Path("data/04-pizza-dad.jpeg")

# Download the image if it doesn't already exist
if not custom_image_path.is_file():
    with open(custom_image_path, "wb") as f:
        # When downloading from GitHub, need to use the "raw" file link
        request = requests.get("https://raw.githubusercontent.com/mrdbourke/pytorch-deep-learning/main/images/04-pizza-dad.jpeg")
        print(f"Downloading {custom_image_path}...")
        f.write(request.content)
else:
    print(f"{custom_image_path} already exists, skipping download.")

#predict on our own custom image
pred_and_plot_image(model=best_model,
                    image_path=custom_image_path,
                    class_names=class_names)

## Exercises

In [None]:
# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
try:
    import torch
    import torchvision
    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")
except:
    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
    !pip3 install -U --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu113
    import torch
    import torchvision
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")

In [None]:
device="cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
#get regular imports
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

try:
  from torchinfo import summary
except:
  print(f"[INFO] Couldn't import torchinfo...installing")
  !pip install torchinfo
  from torchinfo import summary

# try to import going modular file from github
try:
  from going_modular import data_setup, engine
except:
  print(f"[INFO] Couldn't import going_modular...downloading")
  !git clone https://github.com/jjandmoreletters/JJ-s-Code
  !mv JJ-s-Code/going_modular .
  !rm -rf JJ-s-Code
  from going_modular import data_setup, engine

In [None]:
# Getting setup
import torch
import torchvision

# This notebook requires torch v0.12+ and torchvision v0.13+
print(torch.__version__)
print(torchvision.__version__)

In [None]:
#set seeds
def set_seeds(seed:int=77):
  """Sets random state for torch operations.

  Args:
    seed (int, optional): Random seed to set. Defaults to 77.
  """
  #sets seed for general operations
  torch.manual_seed(seed)
  #sets seed for cuda operations
  torch.cuda.manual_seed(seed)

In [None]:
import os
import zipfile

from pathlib import Path

import requests
def download_data(source:str,
                  destination:str,
                  remove_source:bool=None)->Path:
    """Downloads a zipped dataset from source and unzips to destination.

    Args:
        source (str): A link to a zipped file containing data.
        destination (str): A target directory to unzip data to.
        remove_source (bool): Whether to remove the source after downloading and extracting.

    Returns:
        pathlib.Path to downloaded data.

    Example usage:
        download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                      destination="pizza_steak_sushi")
    """

    #setup data pth
    data_path=Path("data/")
    image_path=data_path/destination

    #if image path exists, otherwise create
    if image_path.is_dir():
        print(f"{image_path} already exists, skipping download.")
    else:
        print(f"Did not find {image_path}...creating")
        image_path.mkdir(parents=True, exist_ok=True)

    #download PSS data from git
    target_file=Path(source).name
    with open(data_path/target_file, "wb") as f:
        request=requests.get(source)
        print(f"Downloading {target_file} from {source}...")
        f.write(request.content)

    #unzip file
    with zipfile.ZipFile(data_path/target_file, "r") as zip_ref:
        print(f"Unzipping {target_file}...")
        zip_ref.extractall(image_path)

    #remove if specified
    if remove_source:
        os.remove(data_path/target_file)

    return image_path


image_path=download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                      destination="pizza_steak_sushi")##remove_source=True)





image_path

In [None]:
from torch.utils.tensorboard import SummaryWriter
def create_writer(experiment_name: str,
                  model_name: str,
                  extra: str=None):
    """Creates a torch.utils.tensorboard.writer.SummaryWriter() instance saving to a specific log_dir.

    log_dir is a combination of runs/timestamp/experiment_name/model_name/extra.

    Where timestamp is the current date in YYYY-MM-DD format.

    Args:
        experiment_name (str): Name of experiment.
        model_name (str): Name of model.
        extra (str, optional): Anything extra to add to the directory. Defaults to None.

    Returns:
        torch.utils.tensorboard.writer.SummaryWriter(): Instance of a writer saving to log_dir.

    Example usage:
        # Create a writer saving to "runs/2022-06-04/data_10_percent/effnetb2/5_epochs/"
        writer = create_writer(experiment_name="data_10_percent",
                               model_name="effnetb2",
                               extra="5_epochs")
        # The above is the same as:
        writer = SummaryWriter(log_dir="runs/2022-06-04/data_10_percent/effnetb2/5_epochs/")
    """
    from datetime import datetime
    import os

    # Get timestamp of current date (all experiments on certain day live in same folder)
    timestamp = datetime.now().strftime("%Y-%m-%d") # returns current date in YYYY-MM-DD format

    if extra:
        # Create log directory path
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name, extra)
    else:
        log_dir = os.path.join("runs", timestamp, experiment_name, model_name)

    print(f"[INFO] Created SummaryWriter, saving to: {log_dir}...")
    return SummaryWriter(log_dir=log_dir)

In [None]:
from tqdm.auto import tqdm
from typing import List,Dict

from going_modular.engine import train_step, test_step

def train(model: torch.nn.Module,
          train_dataloader: torch.utils.data.DataLoader,
          test_dataloader: torch.utils.data.DataLoader,
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device,
          writer: torch.utils.tensorboard.writer.SummaryWriter # new parameter to take in a writer
          ) -> Dict[str, List]:
    """Trains and tests a PyTorch model.

    Passes a target PyTorch models through train_step() and test_step()
    functions for a number of epochs, training and testing the model
    in the same epoch loop.

    Calculates, prints and stores evaluation metrics throughout.

    Stores metrics to specified writer log_dir if present.

    Args:
      model: A PyTorch model to be trained and tested.
      train_dataloader: A DataLoader instance for the model to be trained on.
      test_dataloader: A DataLoader instance for the model to be tested on.
      optimizer: A PyTorch optimizer to help minimize the loss function.
      loss_fn: A PyTorch loss function to calculate loss on both datasets.
      epochs: An integer indicating how many epochs to train for.
      device: A target device to compute on (e.g. "cuda" or "cpu").
      writer: A SummaryWriter() instance to log model results to.

    Returns:
      A dictionary of training and testing loss as well as training and
      testing accuracy metrics. Each metric has a value in a list for
      each epoch.
      In the form: {train_loss: [...],
                train_acc: [...],
                test_loss: [...],
                test_acc: [...]}
      For example if training for epochs=2:
              {train_loss: [2.0616, 1.0537],
                train_acc: [0.3945, 0.3945],
                test_loss: [1.2641, 1.5706],
                test_acc: [0.3400, 0.2973]}
    """


    #create empty results dict
    results={"train_loss": [],
             "train_acc": [],
             "test_loss":[],
             "test_acc":[]}


    #Loop through training and testing steps for number of epochs

    for epoch in tqdm(range(epochs)):
      train_loss, train_acc=train_step(model=model,
                                      dataloader=train_dataloader,
                                      loss_fn=loss_fn,
                                      optimizer=optimizer,
                                      device=device)
      #test step
      test_loss, test_acc=test_step(model=model,
                                  dataloader=test_dataloader,
                                  loss_fn=loss_fn,
                                  device=device)
      #print out results
      print(f"Epoch: {epoch+1} | Train loss: {train_loss:.4f} | Train acc: {train_acc:.4f} | Test loss: {test_loss:.4f} | Test acc: {test_acc:.4f}")


      #append losses and accuracies to dictiopnary
      results["train_loss"].append(train_loss)
      results["train_acc"].append(train_acc)
      results["test_loss"].append(test_loss)
      results["test_acc"].append(test_acc)

      ### New: Use the writer parameter to track experiments ###
      # See if there's a writer, if so, log to it
      if writer:
          # Add results to SummaryWriter
          writer.add_scalars(main_tag="Loss",
                               tag_scalar_dict={"train_loss": train_loss,
                                                "test_loss": test_loss},
                               global_step=epoch)
          writer.add_scalars(main_tag="Accuracy",
                               tag_scalar_dict={"train_acc": train_acc,
                                                "test_acc": test_acc},
                               global_step=epoch)

            # Close the writer
          writer.close()
      else:
          pass
    ### End new ###

    # Return the filled results at the end of the epochs
    return results

In [None]:
# Download 10 percent and 20 percent training data (if necessary)
data_10_percent_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip",
                                     destination="pizza_steak_sushi")

data_20_percent_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip",
                                     destination="pizza_steak_sushi_20_percent")

In [None]:
#set up train directory paths

train_dir_10_percent=data_10_percent_path/"train"
train_dir_20_percent=data_20_percent_path/"train"

#setup testing directory paths (note: use same test data for both models)
test_dir=data_10_percent_path/"test"

#check the directories
print(f"train_dir_10_percent: {train_dir_10_percent}")
print(f"train_dir_20_percent: {train_dir_20_percent}")
print(f"testing directory: {test_dir}")

In [None]:
from torchvision import transforms

# Create a transform to normalize data distribution to be inline with ImageNet
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], # values per colour channel [red, green, blue]
                                 std=[0.229, 0.224, 0.225])

# Create a transform pipeline
simple_transform = transforms.Compose([
                                       transforms.Resize((224, 224)),
                                       transforms.ToTensor(), # get image values between 0 & 1
                                       normalize
])

In [None]:
BATCH_SIZE = 32

# Create 10% training and test DataLoaders
train_dataloader_10_percent, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir_10_percent,
                                                                                          test_dir=test_dir,
                                                                                          transform=simple_transform,
                                                                                          batch_size=BATCH_SIZE)

# Create 20% training and test DataLoaders
train_dataloader_20_percent, test_dataloader, class_names = data_setup.create_dataloaders(train_dir=train_dir_20_percent,
                                                                                          test_dir=test_dir,
                                                                                          transform=simple_transform,
                                                                                          batch_size=BATCH_SIZE)

# Find the number of samples/batches per dataloader (using the same test_dataloader for both experiments)
print(f"Number of batches of size {BATCH_SIZE} in 10 percent training data: {len(train_dataloader_10_percent)}")
print(f"Number of batches of size {BATCH_SIZE} in 20 percent training data: {len(train_dataloader_20_percent)}")
print(f"Number of batches of size {BATCH_SIZE} in testing data: {len(train_dataloader_10_percent)} (all experiments will use the same test set)")
print(f"Number of classes: {len(class_names)}, class names: {class_names}")

###Exercise 1: Pick a larger model from torchvision.models to add to the list of experiments (for example, EffNetB3 or higher)

tried out EffNetB0 and EffNetB2 with ~4M parameters and ~9M parameters respectively. Trying a bigger model.

EfficientNet_V2_s:https://pytorch.org/vision/main/models/generated/torchvision.models.efficientnet_v2_s.html#torchvision.models.EfficientNet_V2_S_Weights

We'll compare EffNEtB2 to EffNetV2_S

In [None]:
import torchvision.models as models
effnetv2_s_weights=models.EfficientNet_V2_S_Weights.DEFAULT #DEFAULT="best available weights"
effnetv2_s=models.efficientnet_v2_s(weights=effnetv2_s_weights)
effnetv2_s

In [None]:
def create_effnetv2_s(out_features: int=len(class_names)):
  weights=torchvision.models.EfficientNet_V2_S_Weights.DEFAULT #DEFAULT="best available weights"
  model=torchvision.models.efficientnet_v2_s(weights=weights).to(device)
  dropout=0.2
  in_features=1280#model.classifier.in_features

  #freeze base layer
  for param in model.features.parameters():
    param.requires_grad=False

  #change classifier head
  set_seeds()

  #update classifier head
  model.classifier=nn.Sequential(nn.Dropout(p=dropout),
                                 nn.Linear(in_features=in_features,
                                           out_features=out_features)).to(device)

  #set model name
  model.name="effnetv2_s"
  print(f"Model name: {model.name}")
  return model

  #############

def create_effnetb2(out_features: int=len(class_names)):
  weights=torchvision.models.EfficientNet_B2_Weights.DEFAULT #DEFAULT="best available weights"
  model=torchvision.models.efficientnet_b2(weights=weights).to(device)
  dropout=0.3
  in_features=1408#model.classifier.in_features

  #freeze base layer
  for param in model.features.parameters():
    param.requires_grad=False

  #change classifier head
  set_seeds()

  #update classifier head
  model.classifier=nn.Sequential(nn.Dropout(p=dropout),
                                 nn.Linear(in_features=in_features,
                                           out_features=out_features)).to(device)

  #set model name
  model.name="effnetb2"
  print(f"Model name: {model.name}")
  return model

In [None]:
effnetv2_s.classifier

In [None]:
from torchinfo import summary

effnetv2_s = create_effnetv2_s()

summary(model=effnetv2_s,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

### setup modelling experiments

same code used in https://www.learnpytorch.io/07_pytorch_experiment_tracking/#76-create-experiments-and-set-up-training-code

In [None]:
# Create epoch list
num_epochs = [5, 10]

# Create models list
models = ["effnetb2", "effnetv2_s"]

# Create dataloaders dictionary for various dataloaders
train_dataloaders = {"data_10_percent": train_dataloader_10_percent,
                     "data_20_percent": train_dataloader_20_percent}

In [None]:
%%time
from going_modular.utils import save_model

#set seeds
set_seeds()

#keep track of experiment numbers
experiment_number=0

#loop through each DataLoader
for dataloader_name, train_dataloader in train_dataloaders.items():
  #loop through the epochs
  for epochs in num_epochs:
    #loop through each model name and create a new model instance
    for model_name in models:

      #print out info
      experiment_number+=1
      print(f"[INFO] Experiment number: {experiment_number}")
      print(f"[INFO] Model: {model_name} ")
      print(f"[INFO] DataLoader: {dataloader_name}")
      print(f"[INFO] Epochs: {epochs}")


      #select and create the model
      if model_name=="effnetb2":
        model=create_effnetb2()
      else:
        model=create_effnetv2_s()

      #create a new loss and optimizer for every model
      loss_fn=torch.nn.CrossEntropyLoss()
      optimizer=torch.optim.Adam(params=model.parameters(),
                                 lr=0.001)

      #Train target model with target dataloader and track experiments
      train(model=model,
            train_dataloader=train_dataloader,
            test_dataloader=test_dataloader,
            optimizer=optimizer,
            loss_fn=loss_fn,
            epochs=epochs,
            device=device,
            writer=create_writer(experiment_name=dataloader_name,
                                 model_name=model_name,
                                 extra=f"{epochs} epochs"))


      #save the model to file so we can import it later

      save_filepath=f"07_{model_name}_{dataloader_name}_{epochs}_epochs.pth"

      save_model(model=model,
                target_dir="models",
                model_name=save_filepath)

      print(f"-"*50 +"\n")

### Inspect model results

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

EffNetV2_S with 10 epochs and 20% of the data gets the best performance (lowest test loss and highest test acucracy).

### Exercise 2. Introduce data augmentation to the list of experiments using the 20% pizza, steak, sushi training and test datasets, does this change anything?
* For example, you could have one training DataLoader that uses data augmentation (e.g. `train_dataloader_20_percent_aug` and `train_dataloader_20_percent_no_aug`) and then compare the results of two of the same model types training on these two DataLoaders.
  * **Note**: You may need to alter the `create_dataloaders()` function to be able to take a transform for the training data and the testing data (because you don't need to perform data augmentation on the test data)

In [None]:
from torchvision import transforms

train_transform_data_aug=transforms.Compose([
                                       transforms.Resize((224, 224)),
                                       transforms.TrivialAugmentWide(),
                                       transforms.ToTensor(), # get image values between 0 & 1
                                       normalize
])

no_data_aug_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

In [None]:
# Have to update `create_dataloaders()` to handle different augmentations
import os
from torch.utils.data import DataLoader
from torchvision import datasets

NUM_WORKERS = os.cpu_count() # use maximum number of CPUs for workers to load data

# Note: this is an update version of data_setup.create_dataloaders to handle
# differnt train and test transforms.
def create_dataloaders(
    train_dir,
    test_dir,
    train_transform, # add parameter for train transform (transforms on train dataset)
    test_transform,  # add parameter for test transform (transforms on test dataset)
    batch_size=32, num_workers=NUM_WORKERS
  ):
    """Creates training and testing DataLoaders.

    Takes in a training directory and testing directory path and turns
    them into PyTorch Datasets and then into PyTorch DataLoaders.

    Args:
    train_dir: Path to training directory.
    test_dir: Path to testing directory.
    transform: torchvision transforms to perform on training and testing data.
    batch_size: Number of samples per batch in each of the DataLoaders.
    num_workers: An integer for number of workers per DataLoader.

    Returns:
    A tuple of (train_dataloader, test_dataloader, class_names).
    Where class_names is a list of the target classes.
    Example usage:
      train_dataloader, test_dataloader, class_names = \
        = create_dataloaders(train_dir=path/to/train_dir,
                             test_dir=path/to/test_dir,
                             transform=some_transform,
                             batch_size=32,
                             num_workers=4)
    """

    # Use ImageFolder to create dataset(s)
    train_data = datasets.ImageFolder(train_dir, transform=train_transform)
    test_data = datasets.ImageFolder(test_dir, transform=test_transform)

    # Get class names
    class_names = train_data.classes

    # Turn images into data loaders
    train_dataloader = DataLoader(
        train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
    )
    test_dataloader = DataLoader(
        test_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True,
    )

    return train_dataloader, test_dataloader, class_names

In [None]:
# Create train and test directories
train_20_percent_dir = image_path / "train"
test_20_percent_dir = image_path / "test"

BATCH_SIZE = 32

# Create train dataloader *with* data augmentation
train_dataloader_20_percent_with_aug, test_dataloader_20_percent, class_names = create_dataloaders(train_dir=train_20_percent_dir,
                                                                                                   test_dir=test_20_percent_dir,
                                                                                                   train_transform=train_transform_data_aug,
                                                                                                   test_transform=no_data_aug_transform,
                                                                                                   batch_size=BATCH_SIZE)

# Create train dataloader *without* data augmentation
train_dataloader_20_percent_without_aug, test_dataloader_20_percent, class_names = create_dataloaders(train_dir=train_20_percent_dir,
                                                                                                   test_dir=test_20_percent_dir,
                                                                                                   train_transform=train_transform_data_aug,
                                                                                                   test_transform=no_data_aug_transform,
                                                                                                   batch_size=BATCH_SIZE)

#### Create a function for viewing different images

In [None]:
# Visulize different samples from both dataloaders (aug and no aug)
def view_dataloader_images(dataloader, n=10):
    if n > 10:
        print(f"Having n higher than 10 will create messy plots, lowering to 10.")
        n = 10
    imgs, labels = next(iter(dataloader))
    plt.figure(figsize=(16, 8))
    for i in range(n):
        # Min max scale the image for display purposes
        targ_image = imgs[i]
        sample_min, sample_max = targ_image.min(), targ_image.max()
        sample_scaled = (targ_image - sample_min)/(sample_max - sample_min)

        # Plot images with appropriate axes information
        plt.subplot(1, 10, i+1)
        plt.imshow(sample_scaled.permute(1, 2, 0)) # resize for Matplotlib requirements
        plt.title(class_names[labels[i]])
        plt.axis(False)

In [None]:
# Check out samples with data augmentation
view_dataloader_images(train_dataloader_20_percent_with_aug)

In [None]:
# Checkout samples without data augmentation
view_dataloader_images(train_dataloader_20_percent_without_aug)

#### Augmentation v no augmentation

In [None]:
# Setup number of epochs
num_epochs = [5, 10]

# Create dataloaders dictionary for various dataloaders
train_dataloaders = {"data_20_percent_with_aug": train_dataloader_20_percent_with_aug,
                     "data_20_percent_without_aug": train_dataloader_20_percent_without_aug}

# Create model
models = ["effnetv2_s"]

In [None]:
%%time
from going_modular.utils import save_model

# 1. Set the random seeds
set_seeds(seed=42)

# 2. Keep track of experiment numbers
experiment_number = 0

# 3. Loop through each DataLoader
for dataloader_name, train_dataloader in train_dataloaders.items():

    # 4. Loop through each number of epochs
    for epochs in num_epochs:

        # 5. Loop through each model name and create a new model based on the name
        for model_name in models:

            # 6. Create information print outs
            experiment_number += 1
            print(f"[INFO] Experiment number: {experiment_number}")
            print(f"[INFO] Model: {model_name}")
            print(f"[INFO] DataLoader: {dataloader_name}")
            print(f"[INFO] Number of epochs: {epochs}")

            # 7. Select the model
            if model_name == "effnetb2":
              model = create_effnetb2()
            else:
              model = create_effnetv2_s()

            # 8. Create a new loss and optimizer for every model
            loss_fn = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

            # 9. Train target model with target dataloaders and track experiments
            train(model=model,
                  train_dataloader=train_dataloader,
                  test_dataloader=test_dataloader_20_percent, ### New, use test_dataloader_20_percent
                  optimizer=optimizer,
                  loss_fn=loss_fn,
                  epochs=epochs,
                  device=device,
                  writer=create_writer(experiment_name=dataloader_name,
                                       model_name=model_name,
                                       extra=f"{epochs}_epochs"))

            # 10. Save the model to file so we can get back the best model
            save_filepath = f"07_{model_name}_{dataloader_name}_{epochs}_epochs.pth"
            save_model(model=model,
                       target_dir="models",
                       model_name=save_filepath)
            print("-"*50 + "\n")

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs



Looks like EffNetV2_S without data augmentation performed the best on average for test loss and test accuracy.



### Exercise 3. Scale up the dataset to turn FoodVision Mini into FoodVision Big using the entire Food101 dataset from `torchvision.models`

In [None]:
#get Food 101 Dataset
import torchvision
from torchvision import transforms

#create transform to normalize inline w/ ImageNet
normalize=transforms.Normalize(mean=[0.485, 0.456, 0.406], # values per colour channel [red, green, blue]
                                 std=[0.229, 0.224, 0.225])

simple_transform = transforms.Compose([
                                       transforms.Resize((224, 224)),
                                       transforms.ToTensor(), # get image values between 0 & 1
                                       normalize
])

train_data=torchvision.datasets.Food101(root="data",
                                      split="train",
                                      transform=simple_transform,
                                      download=True)

test_data=torchvision.datasets.Food101(root="data",
                                      split="test",
                                      transform=simple_transform,
                                      download=True)

len(train_data), len(test_data)

### Create Food101 DataLoaders

In [None]:
# Create DataLoaders
import os
BATCH_SIZE = 512 # use a big batch size to get through all the images (100,000+ in Food101)

train_dataloader_big = torch.utils.data.DataLoader(train_data,
                                                   shuffle=True,
                                                   batch_size=BATCH_SIZE,
                                                   num_workers=os.cpu_count(),
                                                   pin_memory=True) # avoid copies of the data into and out of memory, where possible (for speed ups)

test_dataloader_big = torch.utils.data.DataLoader(test_data,
                                                  shuffle=False,
                                                  batch_size=BATCH_SIZE,
                                                  num_workers=os.cpu_count(),
                                                  pin_memory=True)

### Create FoodVision Big model and train it

In [None]:
effnetv2_s_weights=torchvision.models.EfficientNet_V2_S_Weights.DEFAULT #DEFAULT="best available weights"
foodvision_big_model=torchvision.models.efficientnet_v2_s(weights=effnetv2_s_weights)

#freeze base layers
for param in foodvision_big_model.features.parameters():
  param.requires_grad=False

#change the classifier to have 3 out classes
foodvision_big_model.classifier=nn.Sequential(nn.Dropout(p=0.2),
                                              nn.Linear(in_features=1280,
                                                        out_features=101)).to(device) #101 outputs for food101
summary(model=foodvision_big_model,
         input_size=(1, 3, 224, 224))

In [None]:
foodvision_big_results = train(model=foodvision_big_model,
                               train_dataloader=train_dataloader_big,
                               test_dataloader=test_dataloader_big,
                               optimizer=torch.optim.Adam(params=foodvision_big_model.parameters(), lr=0.001),
                               loss_fn=torch.nn.CrossEntropyLoss(),
                               epochs=5,
                               device=device,
                               writer=create_writer(experiment_name="food101_all_data",
                                                    model_name="foodvision_big",
                                                    extra=f"{epochs}_epochs"))

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs