## PyTorch model deployment

In [None]:

'''
# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
try:
    import torch
    import torchvision
    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")
except:
    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
    import torch
    import torchvision
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")'''

In [None]:
import torch
import torchvision

In [None]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

# Try to get torchinfo, install it if it doesn't work
try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

# Try to import the going_modular directory, download it from GitHub if it doesn't work
try:
    from going_modular import data_setup, engine
    from helper_functions import download_data, set_seeds, plot_loss_curves
except:
    # Get the going_modular scripts
    print("[INFO] Couldn't find going_modular or helper_functions scripts... downloading them from GitHub.")
    !git clone https://github.com/mrdbourke/pytorch-deep-learning
    !mv pytorch-deep-learning/going_modular .
    !mv pytorch-deep-learning/helper_functions.py . # get the helper_functions.py script
    !rm -rf pytorch-deep-learning
    from going_modular.going_modular import data_setup, engine
    from helper_functions import download_data, set_seeds, plot_loss_curves

In [None]:
!ls going_modular

In [None]:
# Setup device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

### Getting data

Using Pizza, steak, sushi 20% dataset

In [None]:
!ls data

In [None]:
# Setup directory paths to train and test images
from pathlib import Path

train_dir = Path("data/pizza_steak_sushi_20/train")
test_dir = Path("data/pizza_steak_sushi_20/test")

In [None]:
train_dir, test_dir

**Questions**
1. What is my most ideal machine learning model deployment scenario?
2. Where is my model going to go?
3. How is my model going to function?

**Model should :**
1. Perform well: 95%+ accuracy or even more depending on the scenario
2. Fast: as close to real-time (or faster) as possible (30FPS+ or 30ms latency)

### Creating an EffNetB2 feature extractor

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# 1. Setup pretrained EffNetB2 weights
effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT

# 2. Get EffNetB2 transforms
effnetb2_transforms = effnetb2_weights.transforms()

# 3. Setup pretrained model
effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights) # could also use weights="DEFAULT"

# 4. Freeze the base layers in the model (this will freeze all layers to begin with)
for param in effnetb2.parameters():
    param.requires_grad = False

In [None]:
# Check out EffNetB2 classifier head
effnetb2.classifier

In [None]:
# 5. Update the classifier head
effnetb2.classifier = nn.Sequential(
    nn.Dropout(p=0.3, inplace=True), # keep dropout layer same
    nn.Linear(in_features=1408, # keep in_features same 
              out_features=3)) # change out_features to suit our number of classes

In [None]:
from torchinfo import summary

# Print EffNetB2 model summary (uncomment for full output) 
summary(effnetb2, 
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
# Setup DataLoaders
from going_modular import data_setup
train_dataloader_effnetb2, test_dataloader_effnetb2, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                                 test_dir=test_dir,
                                                                                                 train_transform=effnetb2_transforms,
                                                                                                 test_transform=effnetb2_transforms,
                                                                                                 batch_size=32)

In [None]:
from going_modular import engine

# Setup optimizer
optimizer = torch.optim.Adam(params=effnetb2.parameters(),
                             lr=1e-3)
# Setup loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Set seeds for reproducibility and train the model
set_seeds()
effnetb2_results = engine.train(model=effnetb2,
                                train_dataloader=train_dataloader_effnetb2,
                                test_dataloader=test_dataloader_effnetb2,
                                epochs=20,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                device=device)

In [None]:
from helper_functions import plot_loss_curves

plot_loss_curves(effnetb2_results)

In [None]:
from going_modular import utils

# Saving EffNetB2 feature extractor
utils.save_model_state_dict(model = effnetb2,
                           target_dir = "models",
                           model_name = "state_dict__effnetb2_trained_on_pizza_steak_sushi_20%.pth")


#### Inspecting the size of our EffNetB2

In [None]:
from pathlib import Path

# Get the model size in bytes and convert to megabytes
pretrained_effnetb2_model_size = Path("models/state_dict__effnetb2_trained_on_pizza_steak_sushi_20%.pth").stat().st_size/(1024*1024)
print(f"Pretrained effnetb2 feature size is {round(pretrained_effnetb2_model_size,2)} MB")

In [None]:
# Count the number of parameters in EffNetB2
effnetb2_total_params = sum(torch.numel(param) for param in effnetb2.parameters())
effnetb2_total_params

In [None]:
 # Create a dictionary with EffNetB2 statistics
effnetb2_stats = {"test loss": effnetb2_results["test_loss"][-1],
                "test acc": effnetb2_results["test_acc"][-1],
                 "number of parameters": effnetb2_total_params,
                 "model size (MB)": pretrained_effnetb2_model_size}
effnetb2_stats

### Creating a ViT feature extractor (base model)

In [None]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Get pretrained weights for ViT-Base
vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT # 'DEFAULT' = best available

# Getting automatic transfroms from pretrained ViT weights
vit_transforms = vit_weights.transforms()

# Setup a ViT model instance with pretrained weights
vit = torchvision.models.vit_b_16(weights = vit_weights).to(device)

# Freeze the base parameters
for parameter in vit.parameters():
    parameter.requires_grad = False

In [None]:
# Setup DataLoaders
from going_modular import data_setup
train_dataloader_vit, test_dataloader_vit, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                     test_dir=test_dir,
                                                                                     train_transform=vit_transforms,
                                                                                     test_transform=vit_transforms,
                                                                                     batch_size=32)

In [None]:
# Update the classifier head
vit.heads = nn.Linear(in_features = 768,
                                out_features = len(class_names)).to(device)
vit.heads

In [None]:
# Get a summary using torchinfo.summary
summary(model = vit,
       input_size = (1, 3, 224, 224), # (batch_size, number_of_patches, embedding_dimension)
       col_names = ["input_size", "output_size", "num_params", "trainable"],
       col_width = 20,
       row_settings = ["var_names"])

In [None]:
from going_modular import engine

# Setup optimizer
optimizer = torch.optim.Adam(params=vit.parameters(),
                             lr=1e-3)
# Setup loss function
loss_fn = torch.nn.CrossEntropyLoss()

# Set seeds for reproducibility and train the model
set_seeds()
vit_results = engine.train(model=vit,
                                train_dataloader=train_dataloader_vit,
                                test_dataloader=test_dataloader_vit,
                                epochs=20,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                device=device)

In [None]:
from helper_functions import plot_loss_curves
 
plot_loss_curves(vit_results)

In [None]:
# Saving vit feature extractor

from going_modular import utils

utils.save_model_state_dict(model = vit,
                           target_dir = "models",
                           model_name = "state_dict__ViT_trained_on_pizza_steak_sushi_20%.pth")


In [None]:
from pathlib import Path

# Get the model size in bytes and convert to megabytes
pretrained_vit_model_size = Path("models/state_dict__vit_trained_on_pizza_steak_sushi_20%.pth").stat().st_size/(1024*1024)
print(f"Pretrained vit feature size is {round(pretrained_vit_model_size,2)} MB")

In [None]:
# Count the number of parameters in EffNetB2
vit_total_params = sum(torch.numel(param) for param in vit.parameters())
vit_total_params

In [None]:
 # Create a dictionary with EffNetB2 statistics
vit_stats = {"test loss": vit_results["test_loss"][-1],
            "test acc": vit_results["test_acc"][-1],
            "number of parameters": vit_total_params,
            "model size (MB)": pretrained_vit_model_size}
vit_stats

### Making predictions with our trained models and timing them

* Both the model perform well (95% +)

**Testing models:**
1. Loop through test images
2. Time how long each model takes to make a prediction on the image

We need ($30FPS +$)


In [None]:
from pathlib import Path

# Get all test data paths
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))
test_data_paths[:5]

### Creating a function to make predictions accross the test dataset

Steps to create `pred_and_store()`:
    
   1. Create a function that takes a list of patches and a trained PyTorch and a series of transforms a list of target class names and a target device.
   2. Create an empty list (can return a full list of all predictions later).
   3. Loop through the target input paths (the rest of the steps will take place inside the loop).
   4. Create an empty dictionary for each sample (prediction statistics will go in here).
   5. Get the sample path and ground truth class from the filepath.
   6. Start the prediction timer.
   7. Open the image using `PIL.Image.open(path)`.
   8. Transform the image to be usable with a given model.
   9. Prepare the model for inference by sending to the target device and turning on `eval()` mode.
   10. Turn on `torch.inference_mode()` and pass the target transformed image to the model and perform forward pass + calculate pred prob + pred class.
   11. Add the pred prob + pred class to empty dictionary from step 4.
   12. End the prediction timer started in step 6 and add the time to the prediction dictionary.
   13. See if the predicted class matches the ground truth class.
   14. Append the updated prediction dictionary to the empty list of predictions we create in step 2.
   15. Return the list of prediction dictionaries.

In [None]:
import pathlib
import torch

from PIL import Image
from timeit import default_timer as timer
from tqdm.auto import tqdm
from typing import List, Dict

# 1. Create a function that takes a list of patches and a trained PyTorch and a series of transforms a list of target class names and a target device.
def pred_and_store(paths: List[pathlib.Path],
                  model: torch.nn.Module,
                  transform: torchvision.transforms,
                  class_names: List[str],
                  device: str = "cuda" if torch.cuda.is_available else "cpu") -> List[Dict]:
    
    # 2. Create an empty list (can return a full list of all predictions later)
    pred_list = []
    
    # 3. Loop through the target input paths (the rest of the steps will take place inside the loop)
    for path in tqdm(paths):
    
        # 4. Create an empty dictionary for each sample (prediction statistics will go in here)
        pred_dict = {}
        
        # 5. Get the sample path and ground truth class from the filepath
        pred_dict["image_path"] = path
        class_name = path.parent.stem
        pred_dict["class_name"] = class_name
        
        # 6. Start the prediction timer
        start_time = timer()
        
        # 7. Open the image using `PIL.Image.open(path)`
        img = Image.open(path)
        
        # 8. Transform the image to be usable with a given model
        transformed_image = transform(img).unsqueeze(0).to(device)
        
        # 9. Prepare the model for inference by sending to the target device and turning on `eval()` mode
        model = model.to(device)
        model.eval()
        
        # 10. Turn on `torch.inference_mode()` and pass the target transformed image to the model and perform forward pass + calculate pred prob + pred class
        with torch.inference_mode():
            pred_logit = model(transformed_image)
            pred_prob = torch.softmax(pred_logit, dim = 1)
            pred_label = torch.argmax(pred_prob, dim = 1)
            pred_class = class_names[pred_label.cpu()] # beacause class_names stay in cpu [all python variables stay in CPU]
            
            # 11. Add the pred prob + pred class to empty dictionary from step 4
            pred_dict["pred_prob"] = round(pred_prob.unsqueeze(0).max().cpu().item(), 4)
            pred_dict["pred_class"] = pred_class
            
            # 12. End the prediction timer started in step 6 and add the time to the prediction dictionary
            end_time = timer()
            pred_dict["time_for_pred"] = round(end_time - start_time, 4)
            
        # 13. See if the predicted class matches the ground truth class
        pred_dict["correct"] = class_name == pred_class

        # 14. Append the updated prediction dictionary to the empty list of predictions we create in step 2
        pred_list.append(pred_dict)

    # 15. Return the list of prediction dictionaries
    return pred_list

#### Making and timing predictions with EffNetB2

Images should be passed through appropriate transformers (e.g ViT with `vit_transforms`)

Device should be set correctly

In [None]:
effnetb2_test_pred_dicts = pred_and_store(paths = test_data_paths,
                                         model = effnetb2,
                                         transform = effnetb2_transforms,
                                         class_names = class_names,
                                         device = 'cpu')
effnetb2_test_pred_dicts

In [None]:
# Turn the test_pred_dicts into a DataFrame
import pandas as pd
effnetb2_test_pred_df = pd.DataFrame(effnetb2_test_pred_dicts)
effnetb2_test_pred_df.head()

In [None]:
# Check number of correct predictions
effnetb2_test_pred_df.correct.value_counts()

In [None]:
# Find the average time per prediction
effnetb2_average_time_per_pred = round(effnetb2_test_pred_df.time_for_pred.mean(), 4)
print(f"EffNetB2 average time per prediction: {effnetb2_average_time_per_pred}")

In [None]:
effnetb2_stats["time_per_pred_cpu"] = effnetb2_average_time_per_pred
effnetb2_stats

### Vit

In [None]:
vit_test_pred_dicts = pred_and_store(paths = test_data_paths,
                                         model = vit,
                                         transform = vit_transforms,
                                         class_names = class_names,
                                         device = 'cpu')
vit_test_pred_dicts

In [None]:
# Turn the test_pred_dicts into a DataFrame
import pandas as pd
vit_test_pred_df = pd.DataFrame(vit_test_pred_dicts)
vit_test_pred_df.head()

In [None]:
# Check number of correct predictions
vit_test_pred_df.correct.value_counts()

In [None]:
# Find the average time per prediction
vit_average_time_per_pred = round(vit_test_pred_df.time_for_pred.mean(), 4)
print(f"ViT average time per prediction: {vit_average_time_per_pred}")

**Note:** Prediction times will vary(much like training time) depending on the hardware.

In [None]:
vit_stats["time_per_pred_cpu"] = vit_average_time_per_pred
vit_stats

## Comparing model results, prediction times and size

In [None]:
# Turn stat dictionaries into DataFrames
df = pd.DataFrame([effnetb2_stats, vit_stats])

# Add column for model names
df["model"] = ["EffNetB2", "ViT"]

# Convert accuracy to percentage
df["test acc"] = round(df["test acc"] * 100, 2)

df

#### Which model is better?

* `test_loss` (lower is better) - $ViT$
* `test_acc` (higher is better) - $ViT$
* `number_of_parameters` (generally lower is better) - $EffNetB2$ (if a model has more parameters, it generally takes longer to compute)
     * sometimes models with higher parameters can still perform fast
* `model_size (MB)` - $EffNetB2$ (for our use case of deploying to a mobile device, generally lowe is better)
* `time_per_pred_cpu` - $EffNetB2$ (lower is better, will be highly dependent on the hardware we're running on)


In [None]:
# Compare ViT to EffNetB2 across different characteristics
pd.DataFrame(data = df.set_index("model").loc["ViT"] / df.set_index("model").loc["EffNetB2"],
             columns = ["ViT to EffNetB2 ratios"]).T

### Visualizing the speed vs. performance tradeoff

steps:
1. Create a scatter plot from the comparision DataFrame to compare EffNetB2 and ViT across test accuracy and prediction time.
2. Add titles and labels to make our plot look nice.
3. Annotate the samples on the scatter plot so we know what's going on
4. Create a legend based on the model sizes `(model_size (MB)`).

In [None]:
# 1. Create a plot from model comparision DataFrame
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize = (12, 8))
scatter = ax.scatter(data = df,
                    x = "time_per_pred_cpu",
                    y = "test acc",
                    c = ["blue", "orange"],
                    s = "model size (MB)")

# 2. Add titles and labels
ax.set_title("FoodVision mini Inference Speed vs Performance", fontsize = 18)
ax.set_xlabel("Prediction time per image (seconds)", fontsize = 14)
ax.set_ylabel("Test accuracy (%)", fontsize = 14)
ax.tick_params(axis = "both", labelsize = 12)
ax.grid(True)

# 3. Annotate the samples on the scatter plot
for index, row in df.iterrows():
    ax.annotate(text = row["model"],
               xy = (row["time_per_pred_cpu"], row["test acc"]+0.1))

# 4. Create a legend based on model sizes
handles, labels = scatter.legend_elements(prop = "sizes", alpha = .4)
model_size_legend = ax.legend(handles,
                             labels,
                             loc = "lower right",
                             title = "Model size (MB)",
                             fontsize = 15)

# Save the figure
plt.savefig("foodclassifier-inference-speed-vs-performance.png")

#### Top left is the most ideal position for a model to be the best one

# Bringing model into life by creating Gradio demo


In [None]:
"""# Import Gradio
try:
    import gradio as gr
except:
    !pip install gradio
    import gradio as gr
    
print(f"Gradio version: {gr.__version__}")"""

In [None]:
"""# Put our model on the CPU
effnetb2 = effnetb2.to("cpu")

next(iter(effnetb2.parameters())).device"""

#### Creating a function called `predict()` 

`input (images of food) -> Ml model (effnetb2) -> outputs (food class label, prediction time)`

In [None]:
"""from typing import Tuple, Dict

def predict(img) -> Tuple[Dict, float]:
    
    # Start a timer
    start_time = timer()
    
    # Transform the input image for use with EffNetB2
    img = effnetb2_transforms(img).unsqueeze(0)
    
    # Put the model into eval mode, make prediction
    effnetb2.eval()
    with torch.inference_mode():
        # Pass transformed image through the model and turn the prediction logits into probabilities
        pred_probs = torch.softmax(effnetb2(img), dim = 1)
    
    # Create a prediction label and prediction probability dictionary
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    
    # Calculate pre time
    end_time = timer()
    pred_time = round(end_time - start_time, 4)
    
    # Return pred dict and pred time
    return pred_labels_and_probs, pred_time"""

In [None]:
"""import random
from PIL import Image
from pathlib import Path
from timeit import default_timer as timer


# Get a list of all test image filepaths
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))
print(f"Example test data path: { test_data_paths[0]}")

# Randomly select a test image path
random_image_path = random.sample(test_data_paths, k = 1)[0]
random_image_path

# Open the target image
image = Image.open(random_image_path)
print(f"[INFO] Predicting on image on path: {random_image_path}\n")

# Predict on the target image and print out the outputs
pred_dict, pred_time = predict(img = image)
print(pred_dict)
print(pred_time)"""

#### Creating a list of example images

In [None]:
# Create list of examples inputs to Gradio demo
example_list = [[str(filepath)] for filepath in random.sample(test_data_paths, k = 3)]
example_list

#### Building a Gradio Interface

Using `gr.Interface()`



`input: image -> transform -> predict with EffNetB2 -> output: pred, pred prob, time
`

In [None]:
"""import gradio as gr

# Create title, description and article
title = 'FoodIdentifier 🍣🍕🥩'
description = "An EfficientNetB2 feature extractor computer vision model to classify images as pizza, sushi or steak"
article = " anything I want for the description of the description above 🤪"

# Create the Gradio demo
demo = gr.Interface(fn = predict, # maps input to output
                    inputs = gr.Image(type = 'pil'),
                    outputs = [gr.Label(num_top_classes = 3, label = "Predictions"),
                              gr.Number(label = "Prediction time (s)")],
                    examples = example_list,
                    title = title,
                    description = description,
                    article = article
                   )

# Launch the demo
demo.launch(debug = False, # print errors locally?
           share = True) # generate a publically shareable URL
                    """

## Turning food indentifier to a deployable app

Our gradio demo are fantastic but they expire within 72 hours

##### Hugging Face Spaces



In [None]:
# Create a demo folder to store food identifier app files
import shutil
from pathlib import Path

# Create FoodVision demo path
foodvision_demo_path = Path("demos/foodvision_mini")

# Remove files that might exist and create a new directory
if foodvision_demo_path.exists():
    shutil.rmtree(foodvision_demo_path)
    foodvision_demo_path.mkdir(parents = True,
                              exist_ok = True)
else:
    foodvision_demo_path.mkdir(parents = True,
                              exist_ok = True)
    
!ls demos/foodvision_mini/ # we have no files currently

#### Creating a folder of example images to use with the demo app
* Using 3 images from the test set


In [None]:
# Creating a example directory
foodvision_mini_examples_path = foodvision_demo_path/"examples"
foodvision_mini_examples_path.mkdir(parents = True,
                                   exist_ok = True)

# Paths for example images
food_mini_examples = [Path('data\sushi_served.png'),
                     Path('data\pizza_on_fire.jpg'),
                     Path('data\steak.jpg')]

# Copy the three images to the examples directory
for example in food_mini_examples:
    destination = foodvision_mini_examples_path / example.name
    print(f"Copying {example} to {destination}")
    shutil.copy2(src = example,
                dst = destination)


 Let's verify that we can get a list of list from our `examples/` directory

In [None]:
import os

# Get example filepaths in a list of lists
example_list = [["examples/" + example] for example in os.listdir(foodvision_mini_examples_path)]
example_list

In [None]:
# Moving trained EffNet model to our Foodvision demos directory

# Create a source path for our target model
effnet_foodvision_mini_model_path = "models/state_dict__effnetb2_trained_on_pizza_steak_sushi_20%.pth"

# Create a destination path for target model
effnetb2_model_demo_destination = foodvision_demo_path/effnet_foodvision_mini_model_path.split("/")[1]

# Moving the model file
try:
    print(f"Attempting to move effnetb2 model path to {effnetb2_model_demo_destination}\n")
    
    # Move the model
    shutil.move(src = effnet_foodvision_mini_model_path,
               dst = effnetb2_model_demo_destination)
    print("Model moved complete\n")

# If the model has already been moved, check if it exists
except:
    print(f"No model found at {effnet_foodvision_mini_model_path}, perhaps it already moved\n")
    print(f"Model exists at {effnetb2_model_demo_destination} : {effnetb2_model_demo_destination.exists()}")

### Turning off EffNetB2 model into a Python script `model.py`

We have a saved `.pth` model `state_dict` and want to load it into a model instance.

Let's move our `create_effnetb2_model()` function to a script so we can reuse it

In [None]:
%%writefile demos/foodvision_mini/model.py
import torch
import torchvision

from torch import nn


def create_effnetb2_model(num_classes:int=3, 
                          seed:int=42):
    """Creates an EfficientNetB2 feature extractor model and transforms.

    Args:
        num_classes (int, optional): number of classes in the classifier head. 
            Defaults to 3.
        seed (int, optional): random seed value. Defaults to 42.

    Returns:
        model (torch.nn.Module): EffNetB2 feature extractor model. 
        transforms (torchvision.transforms): EffNetB2 image transforms.
    """
    # Create EffNetB2 pretrained weights, transforms and model
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.efficientnet_b2(weights=weights)

    # Freeze all layers in base model
    for param in model.parameters():
        param.requires_grad = False

    # Change classifier head with random seed for reproducibility
    torch.manual_seed(seed)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes),
    )
    
    return model, transforms

In [None]:
from demos.foodvision_mini import model

effnet_model, effnetb2_transforms_import = model.create_effnetb2_model()
effnetb2_transforms_import

### Turning Foodvision Gradio app into a Python script

the `app.py` file will have major parts:
1. imports and class names setup
2. model and transforms preparation
3. predict function `predict()`
4. Gradio app

In [None]:
%%writefile demos/foodvision_mini/app.py

# 1.
import gradio as gr
import os
import torch

from model import create_effnetb2_model
from timeit import default_timer as timer
from typing import Tuple, Dict

# Setup class names
class_names = ['pizza', 'steak', 'sushi']

# 2.
effnetb2, effnetb2_transforms = create_effnetb2_model(num_classes = len(class_names))

# Load saved weights
effnetb2.load_state_dict(torch.load(f = "state_dict__effnetb2_trained_on_pizza_steak_sushi_20%.pth",
                                   map_location = torch.device("cpu")))

# 3.

def predict(img) -> Tuple[Dict, float]:
    
    # Start a timer
    start_time = timer()
    
    # Transform the input image for use with EffNetB2
    img = effnetb2_transforms(img).unsqueeze(0)
    
    # Put the model into eval mode, make prediction
    effnetb2.eval()
    with torch.inference_mode():
        # Pass transformed image through the model and turn the prediction logits into probabilities
        pred_probs = torch.softmax(effnetb2(img), dim = 1)
    
    # Create a prediction label and prediction probability dictionary
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    
    # Calculate pre time
    end_time = timer()
    pred_time = round(end_time - start_time, 4)
    
    # Return pred dict and pred time
    return pred_labels_and_probs, pred_time

# 4.
title = 'FoodIdentifier 🍣🍕🥩'
description = "An EfficientNetB2 feature extractor computer vision model to classify images as pizza, sushi or steak"
article = " anything I want for the description of the description above 🤪"

# Create example list
# Get example filepaths in a list of lists
example_list = [["examples/" + example] for example in os.listdir("examples")]

# Create the Gradio demo
demo = gr.Interface(fn = predict, # maps input to output
                    inputs = gr.Image(type = 'pil'),
                    outputs = [gr.Label(num_top_classes = 3, label = "Predictions"),
                              gr.Number(label = "Prediction time (s)")],
                    examples = example_list,
                    title = title,
                    description = description,
                    article = article
                   )

# Launch the demo
demo.launch(debug = False, # print errors locally?
           share = True) # generate a publically shareable URL

#### Creating `requirements.txt`

The requirement file will tell hugging face space what software dependencies are required for the app

The three main ones are:
* torch
* torchvision
* gradio

In [None]:
%%writefile demos/foodvision_mini/requirements.txt

torch>1.12.0
torchvision>0.13.0
gradio>3.1.4

### Running Gradio app locally

### Deploying foodvision mini app into HuggingFace
Upload files in proper format like done in this notebook with the help of git.

### Hugging face to notebook
Below code helps.

this format must be the same in all except the username and space name
https://hf.space/embed/ayusk/food_identifier-pizza-steak-sushi/+


In [None]:
# Ipython is a library to help make Python interactive
from IPython.display import IFrame

# Embed FoodVision Mini Gradio demo
IFrame(src = "https://hf.space/embed/ayusk/food_identifier-pizza-steak-sushi/+", width = 900, height = 750)

# Creating FoodVision Big + Transforms

In [None]:
# Create Food101 model and transforms
effnetb2_food101, effnetb2_transforms = create_effnetb2_model(num_classes = 101)

In [None]:
from torchinfo import summary

# Print EffNetB2 model summary (uncomment for full output) 
summary(effnetb2_food101, 
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Since we're working with a larget dataset, we may want to introduce some data augmentation techniques:
* This is because with larger datasets and larger models, overfitting become more of a problem.
* Because we're working with a large number of classes, let's use TrivialAugment as our data augmentation technique.

In [None]:
# Creating training transforms
food101_train_transform = torchvision.transforms.Compose([torchvision.transforms.TrivialAugmentWide(),
                                                      effnetb2_transforms])

food101_train_transform

In [None]:
# Testing data transforms
effnetb2_transforms

## Getting food vision big dataset

In [None]:
from torchvision import datasets

# Setup data directory
from pathlib import Path
data_dir = Path("data")

# Get the training data (-750 images x 101 classes)
train_data = datasets.Food101(root = data_dir,
                             split = "train",
                             transform = effnet_b2_foodvision100_train_transforms, # transform training data
                             download = True)

# Get the testing data (-250 images x 101 classes)
test_data = datasets.Food101(root = data_dir,
                             split = "test",
                             transform = effnet_b2_foodvision100_test_transforms, # transform test data
                             download = True)

In [None]:
# Get food101 class name
food101_class_names = train_data.classes

# View the first 10
food101_class_names[:10]

### Creating a subset of the food101 dataset for faster experimenting

making 20% subset of the dataset (training and test)

Our short goal: TO beat the original food101 paper result of 56.40% accuracy on the test dataset 

Paper source : https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/static/bossard_eccv14_food-101.pdf

This was done using RandomForest. We want to beat it using modern deep learning networks

In [None]:
from torch.utils.data import random_split

def split_dataset(dataset: torchvision.datasets,
                 split_size:float = 0.2,
                 seed: int = 42):
    
    # Create split lengths based on original dataset length
    length_1 = int(len(dataset) * split_size)
    length_2 = len(dataset) - length_1 # remaining length
    
    # Print out info
    print(f"[INFO] Splitting dataset of length {len(dataset)} into splits of size: {length_1} and {length_2}")
    
    # Create splits with given random seed
    random_split_1, random_split_2 = torch.utils.data.random_split(dataset,
                                                                  lengths = [length_1, length_2],
                                                                  generator = torch.manual_seed(seed))
    
    return random_split_1, random_split_2

In [None]:
# Create training 20% split Food101
train_data_food101_20_percent, _ = split_dataset(dataset = train_data,
                                             split_size = 0.2)

# Create testing 20% split food101
test_data_food101_20_percent, _ = split_dataset(dataset = test_data,
                                               split_size = 0.2)

### Turning food101 datasets into `DataLoaders`

In [None]:
import torch

NUM_WORKERS = 2
BATCH_SIZE = 32

# Create food101 20 training DataLoader
train_dataloader_food101_20_percent = torch.utils.data.DataLoader(dataset = train_data_food101_20_percent,
                                                                 batch_size= BATCH_SIZE,
                                                                 shuffle = True,
                                                                 num_workers = NUM_WORKERS)

# Create food101 20% testing DataLoader
test_dataloader_food101_20_percent = torch.utils.data.DataLoader(dataset = test_data_food101_20_percent,
                                                                 batch_size= BATCH_SIZE,
                                                                 shuffle = False,
                                                                 num_workers = NUM_WORKERS)

In [None]:
len(train_dataloader_food101_20_percent), len(test_dataloader_food101_20_percent)

### Training food vision big

Things for training
* 5 epochs
* Optimizer: torch.optim.Adam(lr = 1e-3)
* Loss function: torch.nn.CrossEntropyLoss(label_smoothing = 0.1)

Label smoothing helps to prevent overfitting (it's a regularization technique).

Without label smoothing and 5 classes.
`[0.00, 0.00, 0.99, 0.01, 0.00]`

With label smoothing and 5 classes.
`[0.01, 0.01, 0.96, 0.01, 0.01]` 

**Label Smoothing (regularization technique)** helps assign atleast some value to other classes which prevents model from being over confidient (over-fitting)


In [None]:
from going_modular import engine

# Setup optimizer
optimizer = torch.optim.Adam(params = effnetb2_food101.parameters(),
                            lr = 1e-3)

# Loss
loss_fn = torch.nn.CrossEntropyLoss(label_smoothing = 0.1)

# Want to beat the original food101 paper's result of 56.4% accuracy on the test dataset with 20% of the data
torch.manual_seed(42)
torch.cuda.manual_seed(42)

effnet_food101_results = engine.train(model=effnetb2_food101,
                                train_dataloader=train_dataloader_food101_20_percent,
                                test_dataloader=test_dataloader_food101_20_percent,
                                epochs=5,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                device=device)

### Inspecting loss curves of foodvision Big model


In [None]:
from helper_functions import plot_loss_curves

plot_loss_curves(effnet_food101_results)

### Save and load FoodVision Big model

In [None]:
from going_modular import utils

# Create a model path
effnetb2_food101_model_path = "state_dict__effnetb2_food101_20_percent.pth"

# Save the FoodVision Big model
utils.save_model_state_dict(model = effnetb2_food101,
                target_dir = 'models/',
                model_name = effnetb2_food101_model_path)

In [None]:
# Create food101 compatible EffNetB2 instance
loaded_effnetb2_food101, effnetb2_transforms = create_effnetb2_model(num_classes = 101)

# Load the saved model's state dict()
loaded_effnetb2_food101.load_state_dict(torch.load("models/state_dict__effnetb2_food101_20_percent.pth"))

#### Checking foodvision big model size

In [None]:
from pathlib import Path

# Get the model size in bytes then convert to megabytes
pretrained_effnetb2_food101_size = Path("models", effnetb2_food101_model_path).stat().st_size // (1024 * 1024)
print(f"effnetb2 foodvision big 20 percent model size : {pretrained_effnetb2_food101_size} MB")

## Turning foodvision big model into a deployable app

In [None]:
# Create path to Food101 class names
foodvision_big_class_names_path = foodvision_big_path / "class_names.txt"
foodvision_big_class_names_path

In [None]:
from pathlib import Path

# Create FoodVision Big demo path
foodvision_big_demo_path = Path("demos/foodvision_big")

# Make foodvision big demo directory
foodvision_big_demo_path.mkdir(parents = True,
                              exist_ok = True)

# Make foodvision big demo examples directory
(foodvision_big_demo_path / "examples").mkdir(parents = True,
                                             exist_ok = True)


In [None]:
# Check class names
food101_class_names[:10]

In [None]:
# Create path to Food101 class names
foodvision_big_class_names_path = foodvision_big_demo_path / "class_names.txt"
foodvision_big_class_names_path

#### Write food101 class names to text file


In [None]:
with open(foodvision_big_class_names_path, "w") as f:
    print(f"[INFO] saving food101 class names to {foodvision_big_class_names_path}")
    f.write("\n".join(food101_class_names)) # new line per class names

In [None]:
# open food101 class names file and read each line into a list
with open(foodvision_big_class_names_path, "r") as f:
    food101_class_names_loaded = [food.strip('\n') for food in f.readlines()]
    
food101_class_names_loaded[:5]

#### model.py file

In [None]:
%%writefile demos/foodvision_big/model.py
import torch
import torchvision

from torch import nn


def create_effnetb2_model(num_classes:int=3, 
                          seed:int=42):
    """Creates an EfficientNetB2 feature extractor model and transforms.

    Args:
        num_classes (int, optional): number of classes in the classifier head. 
            Defaults to 3.
        seed (int, optional): random seed value. Defaults to 42.

    Returns:
        model (torch.nn.Module): EffNetB2 feature extractor model. 
        transforms (torchvision.transforms): EffNetB2 image transforms.
    """
    # Create EffNetB2 pretrained weights, transforms and model
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.efficientnet_b2(weights=weights)

    # Freeze all layers in base model
    for param in model.parameters():
        param.requires_grad = False

    # Change classifier head with random seed for reproducibility
    torch.manual_seed(seed)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes),
    )
    
    return model, transforms

#### app.py

In [None]:
%%writefile demos/foodvision_big/app.py

# 1.
import gradio as gr
import os
import torch

from model import create_effnetb2_model
from timeit import default_timer as timer
from typing import Tuple, Dict

# Setup class names
with open("class_names.txt", "r") as f:
    class_names = [food.strip('\n') for food in f.readlines()]

# 2.
effnetb2_food101, effnetb2_transforms = create_effnetb2_model(num_classes = 101)

# Load saved weights
effnetb2_food101.load_state_dict(torch.load("models/state_dict__effnetb2_food101_20_percent.pth",
                                           map_location = torch.device('cpu')))


# 3.
def predict(img) -> Tuple[Dict, float]:
    
    # Start a timer
    start_time = timer()
    
    # Transform the input image for use with EffNetB2
    img = effnetb2_transforms(img).unsqueeze(0)
    
    # Put the model into eval mode, make prediction
    effnetb2_food101.eval()
    with torch.inference_mode():
        # Pass transformed image through the model and turn the prediction logits into probabilities
        pred_probs = torch.softmax(effnetb2_food101(img), dim = 1)
    
    # Create a prediction label and prediction probability dictionary
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    
    # Calculate pre time
    end_time = timer()
    pred_time = round(end_time - start_time, 4)
    
    # Return pred dict and pred time
    return pred_labels_and_probs, pred_time

# 4.
title = 'FoodIdentifier Big (a little) 🍣🍕🥩'
description = "An EfficientNetB2 feature extractor computer vision model to classify images as pizza, sushi or steak"
article = " anything I want for the description of the description above 🤪"

# Create example list
# Get example filepaths in a list of lists
example_list = [["examples/" + example] for example in os.listdir("examples")]

# Create the Gradio demo
demo = gr.Interface(fn = predict, # maps input to output
                    inputs = gr.Image(type = 'pil'),
                    outputs = [gr.Label(num_top_classes = 5, label = "Predictions"),
                              gr.Number(label = "Prediction time (s)")],
                    examples = example_list,
                    title = title,
                    description = description,
                    article = article
                   )

# Launch the demo
demo.launch(debug = False, # print errors locally?
           share = True) # generate a publically shareable URL

In [None]:
%%writefile demos/foodvision_big/requirements.txt

torch>1.12.0
torchvision>0.13.0
gradio>3.1.4

# Food Vision Big (100% data)

In [None]:
import torch
import torchvision

from torch import nn


def create_effnetb2_model(num_classes:int=3, 
                          seed:int=42):
    """Creates an EfficientNetB2 feature extractor model and transforms.

    Args:
        num_classes (int, optional): number of classes in the classifier head. 
            Defaults to 3.
        seed (int, optional): random seed value. Defaults to 42.

    Returns:
        model (torch.nn.Module): EffNetB2 feature extractor model. 
        transforms (torchvision.transforms): EffNetB2 image transforms.
    """
    # Create EffNetB2 pretrained weights, transforms and model
    weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    transforms = weights.transforms()
    model = torchvision.models.efficientnet_b2(weights=weights)

    # Freeze all layers in base model
    for param in model.parameters():
        param.requires_grad = False

    # Change classifier head with random seed for reproducibility
    torch.manual_seed(seed)
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes),
    )
    
    return model, transforms

In [None]:
effnet_b2_foodvision100, effnet_b2_foodvision100_test_transforms = create_effnetb2_model(num_classes = 101,
                                                                                   seed = 42)

In [None]:
# Creating training transforms
effnet_b2_foodvision100_train_transforms = torchvision.transforms.Compose([torchvision.transforms.TrivialAugmentWide(),
                                                      effnet_b2_foodvision100_test_transforms])

effnet_b2_foodvision100_train_transforms

In [None]:
from torchinfo import summary

# Print EffNetB2 model summary (uncomment for full output) 
summary(effnet_b2_foodvision100, 
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

## Dataset

In [None]:
from torchvision import datasets

# Setup data directory
from pathlib import Path
data_dir = Path("data")

# Get the training data (-750 images x 101 classes)
train_data = datasets.Food101(root = data_dir,
                             split = "train",
                             transform = effnet_b2_foodvision100_train_transforms, # transform training data
                             download = True)

# Get the testing data (-250 images x 101 classes)
test_data = datasets.Food101(root = data_dir,
                             split = "test",
                             transform = effnet_b2_foodvision100_test_transforms, # transform test data
                             download = True)

## Dataloaders

In [None]:
import torch

NUM_WORKERS = 2
BATCH_SIZE = 32

# Create food101 20 training DataLoader
train_dataloader = torch.utils.data.DataLoader(dataset = train_data,
                                                batch_size= BATCH_SIZE,
                                                shuffle = True,
                                                num_workers = NUM_WORKERS)

# Create food101 20% testing DataLoader
test_dataloader = torch.utils.data.DataLoader(dataset = test_data,
                                                 batch_size= BATCH_SIZE,
                                                 shuffle = False,
                                                 num_workers = NUM_WORKERS)

In [None]:
len(train_data), len(test_data)

In [None]:
len(train_dataloader), len(test_dataloader)

### Training food vision big

Things for training
* 5 epochs
* Optimizer: torch.optim.Adam(lr = 1e-3)
* Loss function: torch.nn.CrossEntropyLoss(label_smoothing = 0.1)

Label smoothing helps to prevent overfitting (it's a regularization technique).

Without label smoothing and 5 classes.
`[0.00, 0.00, 0.99, 0.01, 0.00]`

With label smoothing and 5 classes.
`[0.01, 0.01, 0.96, 0.01, 0.01]` 

**Label Smoothing (regularization technique)** helps assign atleast some value to other classes which prevents model from being over confidient (over-fitting)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
from going_modular import engine

# Setup optimizer
optimizer = torch.optim.Adam(params = effnet_b2_foodvision100.parameters(),
                            lr = 1e-3)

# Loss
loss_fn = torch.nn.CrossEntropyLoss(label_smoothing = 0.1)

# Want to beat the original food101 paper's result of 56.4% accuracy on the test dataset with 100% of the data
torch.manual_seed(42)
torch.cuda.manual_seed(42)

effnet_food101_full_results = engine.train(model=effnet_b2_foodvision100,
                                train_dataloader=train_dataloader,
                                test_dataloader=test_dataloader,
                                epochs=20,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                device=device)

In [None]:
from helper_functions import plot_loss_curves

plot_loss_curves(effnet_food101_full_results)

In [None]:
from pathlib import Path

# Create FoodVision Big demo path
foodvision_big_path = Path("demos/foodvision_100percent")

# Make foodvision big demo directory
foodvision_big_path.mkdir(parents = True,
                              exist_ok = True)

# Make foodvision big demo examples directory
(foodvision_big_path / "examples").mkdir(parents = True,
                                             exist_ok = True)

### Save and load FoodVision Big model

In [None]:
from going_modular import utils

# Create a model path
effnetb2_food101_full_model_path = "state_dict__effnetb2_food101_100_percent.pth"

# Save the FoodVision Big model
utils.save_model_state_dict(model = effnet_b2_foodvision100,
                target_dir = 'demos/foodvision_100percent/models/',
                model_name = effnetb2_food101_full_model_path)

In [None]:
# Create food101 compatible EffNetB2 instance
loaded_effnetb2_food101, effnetb2_transforms = create_effnetb2_model(num_classes = 101)

# Load the saved model's state dict()
loaded_effnetb2_food101.load_state_dict(torch.load("demos/foodvision_100percent/models/state_dict__effnetb2_food101_100_percent.pth"))

### Checking foodvision big model size

In [None]:
from pathlib import Path

# Get the model size in bytes then convert to megabytes
pretrained_effnetb2_food101_big_size = Path("demos/foodvision_100percent/models", effnetb2_food101_full_model_path).stat().st_size // (1024 * 1024)
print(f"effnetb2 foodvision big full model size : {pretrained_effnetb2_food101_big_size} MB")

# VIT full model

In [None]:
from torch import nn

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Get pretrained weights for ViT-Base
vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT # 'DEFAULT' = best available

# Getting automatic transfroms from pretrained ViT weights
vit_transforms = vit_weights.transforms()

# Setup a ViT model instance with pretrained weights
vit = torchvision.models.vit_b_16(weights = vit_weights).to(device)

# Freeze the base parameters
for parameter in vit.parameters():
    parameter.requires_grad = False

In [None]:
# Creating training transforms
vit_train_transforms = torchvision.transforms.Compose([torchvision.transforms.TrivialAugmentWide(),
                                                      vit_transforms])

vit_train_transforms

In [None]:
# Update the classifier head
vit.heads = nn.Linear(in_features = 768,
                    out_features = 101).to(device)
vit.heads

In [None]:
from torchvision import datasets

# Setup data directory
from pathlib import Path
data_dir = Path("data")

# Get the training data (-750 images x 101 classes)
train_data_vit = datasets.Food101(root = data_dir,
                             split = "train",
                             transform = vit_train_transforms, # transform training data
                             download = True)

# Get the testing data (-250 images x 101 classes)
test_data_vit = datasets.Food101(root = data_dir,
                             split = "test",
                             transform = vit_transforms, # transform test data
                             download = True)

In [None]:
import torch

NUM_WORKERS = 2
BATCH_SIZE = 32

# Create food101 20 training DataLoader
train_dataloader_vit = torch.utils.data.DataLoader(dataset = train_data_vit,
                                                                 batch_size= BATCH_SIZE,
                                                                 shuffle = True,
                                                                 num_workers = NUM_WORKERS)

# Create food101 20% testing DataLoader
test_dataloader_vit = torch.utils.data.DataLoader(dataset = test_data_vit,
                                                                 batch_size= BATCH_SIZE,
                                                                 shuffle = False,
                                                                 num_workers = NUM_WORKERS)

In [None]:
# Get a summary using torchinfo.summary
from torchinfo import summary

summary(model = vit,
       input_size = (1, 3, 224, 224), # (batch_size, number_of_patches, embedding_dimension)
       col_names = ["input_size", "output_size", "num_params", "trainable"],
       col_width = 20,
       row_settings = ["var_names"])

In [None]:
next(iter(vit.parameters())).shape

In [None]:
from going_modular import engine

# Setup optimizer
optimizer = torch.optim.Adam(params=vit.parameters(),
                             lr=1e-3)
# Setup loss function
loss_fn = torch.nn.CrossEntropyLoss(label_smoothing = 0.1)

# Set seeds for reproducibility and train the model
torch.manual_seed(42)
torch.cuda.manual_seed(42)
vit_results = engine.train(model=vit,
                                train_dataloader=train_dataloader_vit,
                                test_dataloader=test_dataloader_vit,
                                epochs=7,
                                optimizer=optimizer,
                                loss_fn=loss_fn,
                                device=device)

In [None]:
from helper_functions import plot_loss_curves

plot_loss_curves(vit_results)

In [None]:
from helper_functions import plot_loss_curves

plot_loss_curves(vit_results)

In [None]:
from going_modular import utils

# Create a model path
vit_food101_full_model_path = "state_dict__vit_food101_100_percent.pth"

# Save the FoodVision Big model
utils.save_model_state_dict(model = vit,
                target_dir = 'demos/foodvision_100percent/models/',
                model_name = vit_food101_full_model_path)

In [None]:
# Instantiate model to load saved state dict()

torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Get pretrained weights for ViT-Base
vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT # 'DEFAULT' = best available

# Getting automatic transfroms from pretrained ViT weights
vit_transforms = vit_weights.transforms()

# Setup a ViT model instance with pretrained weights
loaded_vit_food101 = torchvision.models.vit_b_16(weights = vit_weights).to(device)

# Freeze the base parameters
for parameter in loaded_vit_food101.parameters():
    parameter.requires_grad = False

# Update the classifier head
loaded_vit_food101.heads = nn.Linear(in_features = 768,
                    out_features = 101).to(device)
    
    
# Load the saved model's state dict()
loaded_vit_food101.load_state_dict(torch.load("demos/foodvision_100percent/models/state_dict__vit_food101_100_percent.pth"))

In [None]:
from pathlib import Path

vit_food101_full_model_path = "state_dict__vit_food101_100_percent.pth"


# Get the model size in bytes then convert to megabytes
vit_food101_big_size = Path("demos/foodvision_100percent/models", vit_food101_full_model_path).stat().st_size // (1024 * 1024)
print(f"ViT foodvision big full model size : {vit_food101_big_size} MB")

In [None]:
# Count the number of parameters in ViT
vit_total_params = sum(torch.numel(param) for param in loaded_vit_food101.parameters())
vit_total_params

In [None]:
 # Create a dictionary with ViT statistics
vit_stats = {"test loss": vit_results["test_loss"][-1],
            "test acc": vit_results["test_acc"][-1],
            "number of parameters": vit_total_params,
            "model size (MB)": vit_food101_big_size}
vit_stats

### Making predictions with our trained models and timing them

* Both the model perform well (95% +)

**Testing models:**
1. Loop through test images
2. Time how long each model takes to make a prediction on the image

We need ($30FPS +$)


In [None]:
# dataloader_.sampler.data_source.dataset.imgs

# Turning foodvision big model into a deployable app

#### Write food101 class names to text file

In [None]:
food101_class_names = train_data_vit.classes

In [None]:
# Creating a example directory
foodvision_101_examples_path = Path("demos/foodvision_100percent/examples")
foodvision_101_examples_path.mkdir(parents = True,
                                   exist_ok = True)


In [None]:
foodvision_big_class_names_path = foodvision_big_path / "class_names.txt"

with open(foodvision_big_class_names_path, "w") as f:
    print(f"[INFO] saving food101 class names to {foodvision_big_class_names_path}")
    f.write("\n".join(food101_class_names)) # new line per class names

In [None]:
# open food101 class names file and read each line into a list
with open(foodvision_big_class_names_path, "r") as f:
    food101_class_names_loaded = [food.strip('\n') for food in f.readlines()]
    
food101_class_names_loaded[:5]

In [None]:
%%writefile demos/foodvision_100percent/requirements.txt

torch>1.12.0
torchvision>0.13.0
gradio>3.1.4

#### model.py

In [None]:
%%writefile demos/foodvision_100percent/model.py
import torch
import torchvision

from torch import nn


def create_vit_model(num_classes:int=101, 
                          seed:int=42):
    """Creates an ViT feature extractor model and transforms.

    Args:
        num_classes (int, optional): number of classes in the classifier head. 
            Defaults to 3.
        seed (int, optional): random seed value. Defaults to 42.

    Returns:
        model (torch.nn.Module): ViT feature extractor model. 
        transforms (torchvision.transforms): ViT image transforms.
    """
    
    # Instantiate model to load saved state dict()
    
    # Create ViT pretrained weights, transforms and model
    vit_weights = torchvision.models.ViT_B_16_Weights.DEFAULT # 'DEFAULT' = best available
    transforms = vit_weights.transforms()
    model = torchvision.models.vit_b_16(weights = vit_weights)

    # Freeze all layers in base model
    for param in model.parameters():
        param.requires_grad = False

    # Change classifier head with random seed for reproducibility
    torch.manual_seed(seed)
    model.heads = nn.Linear(in_features = 768,
                        out_features = 101)
    
    return model, transforms

#### app.py

In [None]:
%%writefile demos/foodvision_100percent/app.py

# 1.
import gradio as gr
import os
import torch

from model import create_vit
from timeit import default_timer as timer
from typing import Tuple, Dict

# Setup class names
with open("class_names.txt", "r") as f:
    class_names = [food.strip('\n') for food in f.readlines()]

# 2.
vit_food101, vit_transforms = create_vit_model(num_classes = 101)

# Load saved weights
vit_food101.load_state_dict(torch.load("models/state_dict__vit_food101_100_percent.pth",
                                           map_location = torch.device('cpu')))


# 3.
def predict(img) -> Tuple[Dict, float]:
    
    # Start a timer
    start_time = timer()
    
    # Transform the input image for use with vit
    img = vit_transforms(img).unsqueeze(0)
    
    # Put the model into eval mode, make prediction
    vit_food101.eval()
    with torch.inference_mode():
        # Pass transformed image through the model and turn the prediction logits into probabilities
        pred_probs = torch.softmax(vit_food101(img), dim = 1)
    
    # Create a prediction label and prediction probability dictionary
    pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}
    
    # Calculate pre time
    end_time = timer()
    pred_time = round(end_time - start_time, 4)
    
    # Return pred dict and pred time
    return pred_labels_and_probs, pred_time

# 4.
title = 'FoodIdentifier Big 💪🍕'
description = "A Vision Transformer feature extractor computer vision model to classify images as pizza, sushi or steak"
article = " anything I want for the description of the description above 🤪"

# Create example list
# Get example filepaths in a list of lists
example_list = [["examples/" + example] for example in os.listdir("examples")]

# Create the Gradio demo
demo = gr.Interface(fn = predict, # maps input to output
                    inputs = gr.Image(type = 'pil'),
                    outputs = [gr.Label(num_top_classes = 5, label = "Predictions"),
                              gr.Number(label = "Prediction time (s)")],
                    examples = example_list,
                    title = title,
                    description = description,
                    article = article
                   )

# Launch the demo
demo.launch(debug = False, # print errors locally?
           share = True) # generate a publically shareable URL