# Model & Testing Creation Notebook

In this notebook we will be creating and testing different models to use for food label prediction

### Setup - Import Necessary Libraries & Data for Model Training

In [28]:
# Import basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import torch libraries
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import torchvision

# Import extra libraries
from tqdm.auto import tqdm
import os
import requests
from pathlib import Path

In [29]:
# Set up MLFlow configuration
import mlflow
MODEL_REGISTRY = Path("/tmp/mlflow")
Path(MODEL_REGISTRY).mkdir(parents=True, exist_ok=True)
MLFLOW_TRACKING_URI = "file://" + str(MODEL_REGISTRY.absolute())
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
print (mlflow.get_tracking_uri())
mlflow.set_experiment("FoodVision_Replica")

file:///tmp/mlflow


<Experiment: artifact_location='file:///tmp/mlflow/394942238179406615', creation_time=1730383309214, experiment_id='394942238179406615', last_update_time=1730383309214, lifecycle_stage='active', name='FoodVision_Replica', tags={}>

In [30]:
# Import Food101 Dataset to 'data' folder from torchvision
data_dir = Path("data")

train_data = torchvision.datasets.Food101(root=data_dir, split="train",
                                        download=True, transform=torchvision.transforms.ToTensor())
test_data = torchvision.datasets.Food101(root=data_dir, split="test",
                                    download=True, transform=torchvision.transforms.ToTensor())

### Define Utility Functions

In [31]:
def train_step(model, train_dataloader, optimizer, loss_fn):
    """Carry out training step for one epoch"""

    loss, acc = 0.0, 0.0

    model.train()
    for i, (images, labels) in enumerate(train_dataloader):

        # Make predictions
        y_logits = model(images)  # Forward propagation
        y_probs = y_logits.softmax(dim=1)  # Calculate predicted probabilities

        batch_loss = loss_fn(y_logits, labels)  # Calculate loss of predicitions
        batch_acc = (y_probs.argmax(dim=1) == labels).sum().item() / len(y_probs)

        # Update model
        optimizer.zero_grad()   # Reset optimizer gradient
        batch_loss.backward()   # Carry out back-propagation
        optimizer.step()   # Update model weights

        loss += (batch_loss.item() - loss) / (i+1)  # Update cumulative epoch loss
        acc += (batch_acc - acc) / (i+1)  # Update cumulative epoch accuracy

    return loss, acc


In [32]:
def eval_step(model, val_dataloader, loss_fn):
    """Carry out the evaluation step for an epoch of training"""
    loss, acc = 0.0, 0.0
    
    model.eval()
    with torch.inference_mode():
        for i, (images, labels) in enumerate(val_dataloader):
            
            # Calculate predictions, predictions loss, & predictions accuracy
            y_logits = model(images)    # Carry out Forward Propagation
            y_probs = y_logits.softmax(dim=1)
            batch_loss = loss_fn(y_logits, labels)    # Calculate batch loss
            batch_acc = (y_probs.argmax(dim=1) == labels).sum().item()/len(y_logits) # Calculate batch accuracy

            # Update cumulative loss
            loss += (batch_loss.detach().item() - loss) / (i + 1)
            acc += (batch_acc - acc)/(i+1)
            
    return loss, acc


In [33]:
def train_model(model, num_epochs, train_dataloader, val_dataloader, optimizer, loss_fn):
    """Carry out entire model training process for the specified number of epochs"""

    # Create lists in which to store training & testing loss and accuracy
    train_losses, val_losses = [], []  
    train_accuracies, val_accuracies = [], []
    
    for epoch in tqdm(range(num_epochs)):
        
        # Get training loss & accuracy
        training_loss, training_acc = train_step(model, train_dataloader, optimizer, loss_fn)
        train_losses.append(training_loss)
        train_accuracies.append(training_acc)
        
        # Get validation loss & accuracy
        validation_loss, validation_acc = eval_step(model, val_dataloader, loss_fn)
        val_losses.append(validation_loss)
        val_accuracies.append(validation_acc)

    return_dict = {"training_losses" : train_losses, "validation_losses" : val_losses,
                   "training_accuracies" : train_accuracies, "validation_accuracies" : val_accuracies}
    
    return return_dict


In [34]:
def plot_loss_curves(train_losses : list, val_losses : list):
    """
    Plot the training and validation loss curves
    
    Args:
    train_losses (list) : list containing the training loss for each epoch
    val_losses (list) : list containing the validation loss for each epoch
    """
    
    fig, ax = plt.subplots(1,2, figsize=(6,4))
    ax[0].plot(train_losses)
    ax[0].set_title("Training Loss Curve")
    ax[0].set_ylabel("Loss")
    ax[0].set_xlabel("Epoch")
    
    ax[1].plot(val_losses)
    ax[1].set_title("Validation Loss Curve")
    ax[1].set_ylabel("Loss")
    ax[1].set_xlabel("Epoch")
    
    fig.tight_layout()
    plt.show()

def plot_accuracy_curves(train_accs : list, val_accs : list):
    """
    Plot the training and validation loss curves
    
    Args:
    train_losses (list) : list containing the training loss for each epoch
    val_losses (list) : list containing the validation loss for each epoch
    """
    
    fig, ax = plt.subplots(1,2, figsize=(6,4))
    ax[0].plot(train_accs)
    ax[0].set_title("Training Accuracy Curve")
    ax[0].set_ylim(0,1)
    ax[0].set_ylabel("Accuracy")
    ax[0].set_xlabel("Epoch")
    
    ax[1].plot(val_accs)
    ax[1].set_title("Validation Accuracy Curve")
    ax[1].set_ylim(0,1)
    ax[1].set_ylabel("Accuracy")
    ax[1].set_xlabel("Epoch")
    
    fig.tight_layout()
    plt.show()

In [35]:
def create_effnetb0_classifier(out_features):
    # Import EfficientNet B0 model architecture & assign it default weights
    model = torchvision.models.efficientnet_b0()
    for param in model.parameters():  # Freeze internal parameters
        param.requires_grad = False

    # Adjust classifier layer
    model.classifier = nn.Sequential(nn.Dropout(p=0.2),
                                     nn.Linear(in_features=1280, out_features=out_features))
    return model

def effnet_mlflow_run(name, num_classes, train_params,
                      train_dataloader, val_dataloader):
    
    with mlflow.start_run(run_name=name):
        print(f"'{name}' run started")

        # Create model
        model = create_effnetb0_classifier(out_features=num_classes)

        # Set up optimizer & loss function
        optimizer = train_params["optimizer"](params=model.parameters(), lr=train_params["lr"])
        loss_fn = train_params["loss_fn"]()

        mlflow.set_tag("model_name", "EfficientNetB0")
        mlflow.log_params(train_params)
    
        # Carry out model training
        results = train_model(model=model, num_epochs=train_params["num_epochs"],
                            train_dataloader=train_dataloader, val_dataloader=val_dataloader,
                            optimizer=optimizer, loss_fn=loss_fn)
    
        model_metrics = {"training_loss" : max(results["training_losses"]),
                        "validation_loss" : max(results["validation_losses"]),
                        "training_acc" : max(results["training_accuracies"]),
                        "validation_acc" : max(results["validation_accuracies"])}
    
        mlflow.log_metrics(model_metrics)
        mlflow.pytorch.log_model(model, "pytorch_models")

    return model


### Create Baseline Model
The Baseline Model we use will be an EfficientNet B0 Model w/ Adjusted Classifier trained on 20% of training dataset

In [36]:
# Import EfficientNet B0 model pretrained weights & transformation
effnet_b0_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
effnet_b0_transforms = effnet_b0_weights.transforms()

# Apply EfficientNet B0 Transformation to datasets
train_data.transform = effnet_b0_transforms
test_data.transform = effnet_b0_transforms

# Create Subset of training data which consists of 20% of the training images (150 images) for each class
train_subset_length = round(0.2*len(train_data))
train_data_subset, _ = torch.utils.data.random_split(train_data, lengths=[train_subset_length, len(train_data)-train_subset_length])
print(f"Training data 20% subset contains {len(train_data_subset)} images")

# Create subset of testing data consists of 20% of the training images (50 images) for each class
test_subset_length = round(0.2*len(test_data))
test_data_subset, _ = torch.utils.data.random_split(test_data, [test_subset_length, len(test_data) - test_subset_length])
print(f"Testing data 20% subset contains {len(test_data_subset)} images")

Training data 20% subset contains 15150 images
Testing data 20% subset contains 5050 images


In [37]:
# Create dataloader out of training data 20% subset
train_subset_dataloader = DataLoader(dataset=train_data_subset, batch_size=8,
                                     shuffle=True,
                                     num_workers=0)

# Create dataloader out of testing data 20% subset
test_subset_dataloader = DataLoader(dataset=test_data_subset, batch_size=8,
                                     shuffle=False,
                                     num_workers=0)

In [38]:
mlflow.pytorch.autolog(disable=True)

# Define training parameters
train_params = {"optimizer" : torch.optim.SGD,
                "lr" : 0.001,
                "num_epochs" : 3,
                "loss_fn" : nn.CrossEntropyLoss}

model = effnet_mlflow_run(name="effnetb0_base", num_classes=101, train_params=train_params,
                          train_dataloader=train_subset_dataloader, val_dataloader=test_subset_dataloader)

'effnetb0_base' run started


 33%|███▎      | 1/3 [21:51<43:43, 1311.73s/it]

In [None]:
# Save model state dictionary
SAVED_MODEL_DIR = Path("saved_models/")
SAVED_MODEL_DIR.mkdir(parents=True, exist_ok=True)
torch.save(model.state_dict(), SAVED_MODEL_DIR / "baseline_effnetb0")

# Check size of baseline model
print(f"The baseline EfficientNetB0 model's state dictionary is {os.path.getsize(SAVED_MODEL_DIR / "baseline_effnetb0")/(1024*1024)} MB")