# Model Deployment
- Deployment allows you to test your model in the real world rather than on private training and test sets.
- Three questions for machine learning model deployment:
    - What’s the most ideal use case for the model (how well and how fast does it perform)?
    - Where’s the model going to go (is it on-device or on the cloud)?
    - How’s the model going to function (are predictions online or offline)?

Format of the input: **[batch_size, no_of_channels, height, width]**

---

In [None]:
# Check GPU information
!nvidia-smi

In [None]:
import torch
import torchvision
import matplotlib.pyplot as plt
import scripts.data_setup as data_setup
import scripts.engine as engine


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
RANDOM_SEED = 42

In [None]:
print(torch.__version__)
print(torchvision.__version__)

## 1. Get data ready (turn into tensor)
Our dataset is a subset of the Food101 dataset. Food101 starts 101 different classes of food and 1000 images per class (750 training, 250 testing). Our dataset starts with 3 classes of food and only 10% of the images (~75 training, 25 testing).

Why do this?
- When starting out ML projects, it's important to try things on a small scale and then increase the scale when necessary.
- The whole point is to speed up how fast you can experiment.

### 1.1 Get the data

In [None]:
import scripts.get_data as get_data
import os


url = "https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip"
image_path = os.path.join(os.getcwd(), "data", "pizza_steak_sushi_20_percent")
get_data.download_data(url=url, save_path_str=image_path)

In [None]:
# Check details about data

import os
def walk_through_dir(dir_path):
    """Walks through dir_path returning its contents."""
    for dirpath, dirnames, filenames in os.walk(dir_path):
        print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")

walk_through_dir(image_path)

In [None]:
from pathlib import Path


# Setup train and testing paths
image_path = Path(image_path)
train_dir = image_path / "train"
test_dir = image_path / "test"

In [None]:
# Visualize the data (randomly take some images)

import random 
from PIL import Image

# Set seed
# random.seed(RANDOM_SEED)

# 1. Get all image paths 
image_path_list = list(image_path.glob("*/*/*.jpg"))

# 2. Pick a random image path
random_image_path = random.choice(image_path_list)

# 3. Get image class from path name (the image class is the name of the directory where the image is stored)
image_class = random_image_path.parent.stem

# 4. Open image
img = Image.open(random_image_path)

# 5. Print metadata 
print(f"Random image path: {random_image_path}")
print(f"Image class: {image_class}")
print(f"Image height: {img.height}")
print(f"Image width: {img.width}")
img

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Turn the image into an array
img_as_array = np.asarray(img)

# Plot the image with matplotlib
plt.figure(figsize=(10, 7))
plt.imshow(img_as_array)
plt.title(f"Image class: {image_class} | Image shape: {img_as_array.shape} -> [height, width, color_channels] (HWC)")
plt.axis(False)

## 2. FoodVision Mini model deployment experiment outline
Our goals are:
- Performance - A model that performs at 95%+ accuracy.
- Speed - A model that can classify an image at ~30FPS (0.03 seconds inference time per image, also known as latency).

To try and achieve these results, let's bring in our best performing models from the previous sections:
- EffNetB2 feature extractor (EffNetB2 for short)
- ViT-B/16 feature extractor (ViT for short)

## 3. Creating an EffNetB2 feature extractor

### 3.1 Creating a function to make an EffNetB2 feature extractor

In [None]:
import torch.nn as nn


def create_effnetb2_model(num_classes:int=3, 
                          seed:int=42):
    """Creates an EfficientNetB2 feature extractor model and transforms.

    Args:
        num_classes (int, optional): number of classes in the classifier head. 
            Defaults to 3.
        seed (int, optional): random seed value. Defaults to 42.

    Returns:
        model (torch.nn.Module): EffNetB2 feature extractor model. 
        transforms (torchvision.transforms): EffNetB2 image transforms.
    """
    # 1, 2, 3. Create EffNetB2 pretrained weights, transforms and model
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.efficientnet_b2(weights=weights)

    # 4. Freeze all layers in base model
    for param in model.parameters():
        param.requires_grad = False

    # 5. Change classifier head with random seed for reproducibility
    torch.manual_seed(seed)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes),
    )
    
    return model, transforms

effnetb2, effnetb2_transforms = create_effnetb2_model(num_classes=3, seed=RANDOM_SEED)

In [None]:
from torchinfo import summary


summary(effnetb2,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

### 3.2 Creating DataLoaders for EffNetB2

In [None]:
# Setup DataLoaders
from scripts.data_setup import create_dataloaders


train_dataloader_effnetb2, test_dataloader_effnetb2, class_names = create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=effnetb2_transforms,
    batch_size=32
)

### 3.3 Training EffNetB2 feature extractor

In [None]:
from scripts.engine import train
from scripts.utils import set_seeds, plot_loss_curves

# Setup optimizer
optimizer = torch.optim.Adam(params=effnetb2.parameters(), lr=1e-3)
# Setup loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Set seeds for reproducibility and train the model
set_seeds()
effnetb2_results = train(
    model=effnetb2,
    train_dataloader=train_dataloader_effnetb2,
    test_dataloader=test_dataloader_effnetb2,
    epochs=10,
    optimizer=optimizer,
    loss_fn=loss_fn,
    device=DEVICE
)

print(effnetb2_results)
plot_loss_curves(effnetb2_results)

### 3.4 Saving EffNetB2 feature extractor

In [None]:
from scripts.utils import save_model

model_name = "08_effnetb2_pizza_steak_sushi_20_percent.pth"

# Save the model
save_model(
    model=effnetb2,
    target_dir="models",
    model_name=model_name
)

### 3.5 Collecting EffNetB2 feature extractor stats

In [None]:
from pathlib import Path

# Get the model size in bytes then convert to megabytes
pretrained_effnetb2_model_size = Path(f"models/{model_name}").stat().st_size // (1024*1024) # division converts bytes to megabytes (roughly) 
print(f"Pretrained EffNetB2 feature extractor model size: {pretrained_effnetb2_model_size} MB")

# Count number of parameters in EffNetB2
effnetb2_total_params = sum(torch.numel(param) for param in effnetb2.parameters())
effnetb2_total_params

In [None]:
# Create a dictionary with EffNetB2 statistics
effnetb2_stats = {"test_loss": effnetb2_results["test_loss"][-1],
                  "test_acc": effnetb2_results["test_acc"][-1],
                  "number_of_parameters": effnetb2_total_params,
                  "model_size (MB)": pretrained_effnetb2_model_size}
effnetb2_stats

## 4. Creating a ViT feature extractor

### 4.1 Creating a function to make an ViT feature extractor

In [None]:
def create_vit_model(num_classes:int=3, 
                     seed:int=42):
    """Creates a ViT-B/16 feature extractor model and transforms.

    Args:
        num_classes (int, optional): number of target classes. Defaults to 3.
        seed (int, optional): random seed value for output layer. Defaults to 42.

    Returns:
        model (torch.nn.Module): ViT-B/16 feature extractor model. 
        transforms (torchvision.transforms): ViT-B/16 image transforms.
    """
    # Create ViT_B_16 pretrained weights, transforms and model
    weights = torchvision.models.ViT_B_16_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.vit_b_16(weights=weights)

    # Freeze all layers in model
    for param in model.parameters():
        param.requires_grad = False

    # Change classifier head to suit our needs (this will be trainable)
    torch.manual_seed(seed)
    model.heads = nn.Sequential(nn.Linear(in_features=768, # keep this the same as original model
                                          out_features=num_classes)) # update to reflect target number of classes
    
    return model, transforms

# Create ViT model and transforms
vit, vit_transforms = create_vit_model(num_classes=3, seed=RANDOM_SEED)

In [None]:
from torchinfo import summary

# # Print ViT feature extractor model summary (uncomment for full output)
summary(vit, 
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

### 4.2 Creating DataLoaders for ViT

In [None]:
# Setup ViT DataLoaders
from scripts.data_setup import create_dataloaders

train_dataloader_vit, test_dataloader_vit, class_names = create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=vit_transforms,
    batch_size=32
)

### 4.3 Training ViT feature extractor

In [None]:
from scripts.engine import train
from scripts.utils import set_seeds, plot_loss_curves

# Setup optimizer
optimizer = torch.optim.Adam(params=vit.parameters(), lr=1e-3)
# Setup loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Set seeds for reproducibility and train the model
set_seeds()
vit_results = train(
    model=vit,
    train_dataloader=train_dataloader_vit,
    test_dataloader=test_dataloader_vit,
    epochs=10,
    optimizer=optimizer,
    loss_fn=loss_fn,
    device=DEVICE
)

print(vit_results)
plot_loss_curves(vit_results)

### 4.4 Saving ViT feature extractor

In [None]:
from scripts.utils import save_model

model_name = "08_vit_pizza_steak_sushi_20_percent.pth"

# Save the model
save_model(
    model=effnetb2,
    target_dir="models",
    model_name=model_name
)

### 4.5 Collecting ViT feature extractor stats

In [None]:
from pathlib import Path

# Get the model size in bytes then convert to megabytes
pretrained_vit_model_size = Path(f"models/{model_name}").stat().st_size // (1024*1024) # division converts bytes to megabytes (roughly) 
print(f"Pretrained ViT feature extractor model size: {pretrained_vit_model_size} MB")

# Count number of parameters in EffNetB2
vit_total_params = sum(torch.numel(param) for param in vit.parameters())
vit_total_params

In [None]:
# Create a dictionary with EffNetB2 statistics
vit_stats = {"test_loss": vit_results["test_loss"][-1],
            "test_acc": vit_results["test_acc"][-1],
            "number_of_parameters": vit_total_params,
            "model_size (MB)": pretrained_vit_model_size}
vit_stats

## 5. Making predictions with our trained models and timing them
One thing we'll have to remember is that for our model to make predictions on an image, the image has to be in same format as the images our model was trained on.

> Note: We time the predictions one by one rather than by batch because when our model is deployed, it will likely only be making a prediction on one image at a time. As in, someone takes a photo and our model predicts on that single image.

In [None]:
from pathlib import Path

# Get all test data paths
print(f"[INFO] Finding all filepaths ending with '.jpg' in directory: {test_dir}")
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))
test_data_paths[:5]

### 5.1 Making and timing predictions with EffNetB2

In [None]:
from scripts.utils import pred_and_store
import pandas as pd


# Make predictions across test dataset with EffNetB2
effnetb2_test_pred_dicts = pred_and_store(
    paths=test_data_paths,
    model=effnetb2,
    transform=effnetb2_transforms,
    class_names=class_names,
    device="cpu") # make predictions on CPU

# Turn the test_pred_dicts into a DataFrame
effnetb2_test_pred_df = pd.DataFrame(effnetb2_test_pred_dicts)
effnetb2_test_pred_df.head()

In [None]:
# Check number of correct predictions
effnetb2_test_pred_df.correct.value_counts()

In [None]:
# Find the average time per prediction 
effnetb2_average_time_per_pred = round(effnetb2_test_pred_df.time_for_pred.mean(), 4)
print(f"EffNetB2 average time per prediction: {effnetb2_average_time_per_pred} seconds")

In [None]:
# Add EffNetB2 average prediction time to stats dictionary 
effnetb2_stats["time_per_pred_cpu"] = effnetb2_average_time_per_pred
effnetb2_stats

### 5.1 Making and timing predictions with ViT

In [None]:
from scripts.utils import pred_and_store
import pandas as pd


# Make predictions across test dataset with EffNetB2
vit_test_pred_dicts = pred_and_store(
    paths=test_data_paths,
    model=vit,
    transform=vit_transforms,
    class_names=class_names,
    device="cpu") # make predictions on CPU

# Turn the test_pred_dicts into a DataFrame
vit_test_pred_df = pd.DataFrame(vit_test_pred_dicts)
vit_test_pred_df.head()

In [None]:
# Check number of correct predictions
vit_test_pred_df.correct.value_counts()

In [None]:
# Find the average time per prediction 
vit_average_time_per_pred = round(vit_test_pred_df.time_for_pred.mean(), 4)
print(f"ViT average time per prediction: {vit_average_time_per_pred} seconds")

In [None]:
# Add ViT average prediction time to stats dictionary 
vit_stats["time_per_pred_cpu"] = vit_average_time_per_pred
vit_stats

## 6. Comparing model results, prediction times and size
> Note: Prediction times will be different across different hardware types (e.g. Intel i9 vs Google Colab CPU vs GPU) so it's important to think about and test where your model is going to end up. Asking questions like "where is the model going to be run?" or "what is the ideal scenario for running the model?" and then running experiments to try and provide answers on your way to deployment is very helpful.

In [None]:
# Turn stat dictionaries into DataFrame
df = pd.DataFrame([effnetb2_stats, vit_stats])

# Add column for model names
df["model"] = ["EffNetB2", "ViT"]

# Convert accuracy to percentages
df["test_acc"] = round(df["test_acc"] * 100, 2)

df

### 6.1 Visualizing the speed vs. performance tradeoff
In our case, the differences between our model's performance levels (on the test loss and test accuracy) aren't too extreme. But since we'd like to put an emphasis on speed to begin with, we're going to stick with deploying EffNetB2 since it's faster and has a much smaller footprint.

In [None]:
# 1. Create a plot from model comparison DataFrame
fig, ax = plt.subplots(figsize=(12, 8))
scatter = ax.scatter(data=df, 
                     x="time_per_pred_cpu", 
                     y="test_acc", 
                     c=["blue", "orange"], # what colours to use?
                     s="model_size (MB)") # size the dots by the model sizes

# 2. Add titles, labels and customize fontsize for aesthetics
ax.set_title("FoodVision Mini Inference Speed vs Performance", fontsize=18)
ax.set_xlabel("Prediction time per image (seconds)", fontsize=14)
ax.set_ylabel("Test accuracy (%)", fontsize=14)
ax.tick_params(axis='both', labelsize=12)
ax.grid(True)

# 3. Annotate with model names
for index, row in df.iterrows():
    ax.annotate(text=row["model"], # note: depending on your version of Matplotlib, you may need to use "s=..." or "text=...", see: https://github.com/faustomorales/keras-ocr/issues/183#issuecomment-977733270 
                xy=(row["time_per_pred_cpu"]+0.0006, row["test_acc"]+0.03),
                size=12)

# 4. Create a legend based on model sizes
handles, labels = scatter.legend_elements(prop="sizes", alpha=0.5)
model_size_legend = ax.legend(handles, 
                              labels, 
                              loc="lower right", 
                              title="Model size (MB)",
                              fontsize=12)

# Save the figure
plt.savefig("images/08-foodvision-mini-inference-speed-vs-performance.jpg")

# Show the figure
plt.show()

## 7. Bringing FoodVision Mini to life by creating a Gradio demo
Why create a demo of your models?

Because metrics on the test set look nice but you never really know how you're model performs until you use it in the wild.

In [None]:
# Import/install Gradio 
try:
    import gradio as gr
except: 
    %conda install -c conda-forge gradio
    import gradio as gr
    
print(f"Gradio version: {gr.__version__}")

### 7.1 Creating a function to map our inputs and outputs
input: image -> transform -> predict with EffNetB2 -> output: pred, pred prob, time taken

In [None]:
# Put EffNetB2 on CPU since we are using Gradio on CPU
effnetb2.to("cpu") 

# Check the device
next(iter(effnetb2.parameters())).device

In [None]:
from typing import Tuple, Dict
from timeit import default_timer as timer


def predict(img) -> Tuple[Dict, float]:
    """Transforms and performs a prediction on img and returns prediction and time taken.
    """
    # Start the timer
    start_time = timer()
    
    # Transform the target image and add a batch dimension
    img = effnetb2_transforms(img).unsqueeze(0)
    
    # Put model into evaluation mode and turn on inference mode
    effnetb2.eval()
    with torch.inference_mode():
        # Pass the transformed image through the model and turn the prediction logits into prediction probabilities
        pred_probs = torch.softmax(effnetb2(img), dim=1)
    
    # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    
    # Calculate the prediction time
    pred_time = round(timer() - start_time, 5)
    
    # Return the prediction dictionary and prediction time 
    return pred_labels_and_probs, pred_time

Now let's see our function in action by performing a prediction on a random image from the test dataset.

In [None]:
import random
from PIL import Image

# Get a list of all test image filepaths
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))

# Randomly select a test image path
random_image_path = random.sample(test_data_paths, k=1)[0]

# Open the target image
image = Image.open(random_image_path)
print(f"[INFO] Predicting on image at path: {random_image_path}\n")

# Predict on the target image and print out the outputs
pred_dict, pred_time = predict(img=image)
print(f"Prediction label and probability dictionary: \n{pred_dict}")
print(f"Prediction time: {pred_time} seconds")

### 7.2 Creating a list of example images

In [None]:
# Create a list of example inputs to our Gradio demo
example_list = [[str(filepath)] for filepath in random.sample(test_data_paths, k=3)]
example_list

### 7.3 Building a Gradio interface

In [None]:
import gradio as gr


# Create title, description and article strings
title = "FoodVision Mini 🍕🥩🍣"
description = "An EfficientNetB2 feature extractor computer vision model to classify images of food as pizza, steak or sushi."
article = "Created at [09. PyTorch Model Deployment](https://www.learnpytorch.io/09_pytorch_model_deployment/)."

# Create the Gradio demo
demo = gr.Interface(fn=predict, # mapping function from input to output
                    inputs=gr.Image(type="pil"), # what are the inputs?
                    outputs=[gr.Label(num_top_classes=3, label="Predictions"), # what are the outputs?
                             gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
                    examples=example_list, 
                    title=title,
                    description=description,
                    article=article)

# Launch the demo!
demo.launch(debug=False, # print errors locally?
            share=True) # generate a publically shareable URL?