### Import Necessary Libraries

In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
from torchinfo import summary
from torch import nn
from torchvision import transforms
import data_setup, engine
from helper_functions import set_seeds, plot_loss_curves
from pathlib import Path
from PIL import Image
from timeit import default_timer as timer
from tqdm.auto import tqdm
from typing import List, Dict, Tuple
import pandas as pd
import gradio as gr
import random

In [2]:
device = 'cpu' # we are working on deploying our model on a server we do not have GPU access

### GEtting dataset

In [3]:
data_20_percent_path = Path("data/pizza_steak_sushi_20_percent")

In [4]:
train_dir = data_20_percent_path / "train"
test_dir = data_20_percent_path / "test"

### Getting our Pretrained Model

In [5]:
effnetb0_weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
eeffnetb0_transforms = effnetb0_weights.transforms()
effnetb0 = torchvision.models.efficientnet_b0(weights=effnetb0_weights)

In [6]:
summary(model=effnetb0,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 1000]            --                   True
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1280, 7, 7]      --                   True
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   True
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    864                  True
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    64                   True
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 16, 112,

In [6]:
for param in effnetb0.features.parameters():
    param.requires_grad = False

changing the classifier head to match our dataset

In [8]:
effnetb0.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)

In [7]:
torch.manual_seed(42)

effnetb0.classifier = nn.Sequential(
    nn.Dropout(p=0.3, inplace=True),
    nn.Linear(in_features=1280, out_features=3)
)

In [10]:
effnetb0

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

checking our model summary is important, because it lets us to be aware if there is any potential error in the layers by running the input shape through them and giving the output dimension

In [8]:
summary(model=effnetb0,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 3]               --                   Partial
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1280, 7, 7]      --                   False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    (864)                False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    (64)                 False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 1

In [8]:
train_dataloader_effnetb0, test_dataloader_effnetb0, class_names = data_setup.create_dataloaders(train_dir=train_dir,
                                                                                                 test_dir=test_dir,
                                                                                                 transform=eeffnetb0_transforms,
                                                                                                 batch_size=32)

We have a trained version which we saved while we ran experiments. we can simply load this version

In [9]:
trained_effnetb0_path = Path("models/07_effnetb0_data_20_percent_10_epochs.pth")

In [10]:
trained_effnetb0_path.is_file()

True

In [17]:
f"{trained_effnetb0_path.stat().st_size // (1024*1024)} MB"

'15 MB'

In [11]:
effnetb0.load_state_dict(torch.load(trained_effnetb0_path))

  effnetb0.load_state_dict(torch.load(trained_effnetb0_path))


<All keys matched successfully>

### Prediction   
Checking how long it takes to make prediction using our model, we will be predicting for all test dataset we have

In [18]:
print(f"[INFO] Finding all files ending with '.jpg' in the directory {test_dir}")
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))
test_data_paths[:5]

[INFO] Finding all files ending with '.jpg' in the directory data\pizza_steak_sushi_20_percent\test


[WindowsPath('data/pizza_steak_sushi_20_percent/test/pizza/1001116.jpg'),
 WindowsPath('data/pizza_steak_sushi_20_percent/test/pizza/1032754.jpg'),
 WindowsPath('data/pizza_steak_sushi_20_percent/test/pizza/1067986.jpg'),
 WindowsPath('data/pizza_steak_sushi_20_percent/test/pizza/129666.jpg'),
 WindowsPath('data/pizza_steak_sushi_20_percent/test/pizza/1315645.jpg')]

In [56]:
def pred_and_store(paths: List[Path], model: torch.nn.Module, transform: torchvision.transforms, class_names: List[str], device: str = "cuda" if torch.cuda.is_available() else "cpu") -> List[Dict]:
    pred_list = []

    for path in tqdm(paths):
        pred_dict = {}
        pred_dict["image_path"] = path
        class_name = path.parent.stem
        pred_dict["class_names"] = class_name

        start_time = timer()
        img = Image.open(path)

        transformed_image = transform(img).unsqueeze(0).to(device)

        model.to(device)
        model.eval()

        with torch.inference_mode():
            pred_logit = model(transformed_image)
            pred_prob = torch.softmax(pred_logit, dim=1)
            pred_label = torch.argmax(pred_prob, dim=1)
            pred_class = class_names[pred_label.cpu()]

            pred_dict["pred_prob"] = round(pred_prob.unsqueeze(0).max().cpu().item(), 4)
            pred_dict["pred_class"] = pred_class

        end_time = timer()
        pred_dict["time_for_pred"] = round(end_time - start_time, 4)
        
        pred_dict["correct"] = class_name == pred_class

        pred_list.append(pred_dict)

    return pred_list

        

In [57]:
effnetb0_test_pred_dicts = pred_and_store(paths=test_data_paths, model=effnetb0, transform=eeffnetb0_transforms, class_names=class_names,device='cpu')

  0%|          | 0/150 [00:00<?, ?it/s]

In [58]:
effnetb0_test_pred_dicts[:2]

[{'image_path': WindowsPath('data/pizza_steak_sushi_20_percent/test/pizza/1001116.jpg'),
  'class_names': 'pizza',
  'pred_prob': 0.991,
  'pred_class': 'pizza',
  'time_for_pred': 0.1105,
  'correct': True},
 {'image_path': WindowsPath('data/pizza_steak_sushi_20_percent/test/pizza/1032754.jpg'),
  'class_names': 'pizza',
  'pred_prob': 0.5806,
  'pred_class': 'pizza',
  'time_for_pred': 0.0768,
  'correct': True}]

In [60]:
effnetb0_test_pred_df = pd.DataFrame(effnetb0_test_pred_dicts)
effnetb0_test_pred_df.head(6)

Unnamed: 0,image_path,class_names,pred_prob,pred_class,time_for_pred,correct
0,data\pizza_steak_sushi_20_percent\test\pizza\1...,pizza,0.991,pizza,0.1105,True
1,data\pizza_steak_sushi_20_percent\test\pizza\1...,pizza,0.5806,pizza,0.0768,True
2,data\pizza_steak_sushi_20_percent\test\pizza\1...,pizza,0.9972,pizza,0.081,True
3,data\pizza_steak_sushi_20_percent\test\pizza\1...,pizza,0.9106,pizza,0.0873,True
4,data\pizza_steak_sushi_20_percent\test\pizza\1...,pizza,0.9857,pizza,0.0726,True
5,data\pizza_steak_sushi_20_percent\test\pizza\1...,pizza,0.9947,pizza,0.0801,True


In [61]:
effnetb0_test_pred_df.correct.value_counts()

correct
True     136
False     14
Name: count, dtype: int64

In [63]:
# Find the average time per prediction
effnetb2_average_time_per_pred = round(
    effnetb0_test_pred_df.time_for_pred.mean(), 4)
print(
    f"EffNetB2 average time per prediction: {effnetb2_average_time_per_pred} seconds")

EffNetB2 average time per prediction: 0.0844 seconds


### Getting predict function for Gradio

In [16]:
def predict(img) -> Tuple[Dict, float]:
    pred_list = []

    pred_dict = {}

    start_time = timer()

    img = eeffnetb0_transforms(img).unsqueeze(0)

    effnetb0.to(device)
    effnetb0.eval()

    with torch.inference_mode():
        pred_probs = torch.softmax(effnetb0(img), dim=1)

        # Create a prediction label and prediction probability dictionary for each prediction class (this is the required format for Gradio's output parameter)
        pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))}

    pred_time = round(timer() - start_time, 4)
    

    pred_list.append(pred_dict)

    return pred_labels_and_probs, pred_time

Trying out our function on a random Image

In [17]:
test_data_paths = list(Path(test_dir).glob("*/*.jpg"))
random_image_path = random.sample(test_data_paths, k=1)[0]
image = Image.open(random_image_path)
print(f"[INFO] Predicting on image at path: {random_image_path}\n")

pred_dict, pred_time = predict(img=image)
print(f"Prediction label and probability dictionary: \n{pred_dict}")
print(f"Prediction time: {pred_time} seconds")

[INFO] Predicting on image at path: data\pizza_steak_sushi_20_percent\test\sushi\2394442.jpg

Prediction label and probability dictionary: 
{'pizza': 0.006527615711092949, 'steak': 0.008591057732701302, 'sushi': 0.9848814010620117}
Prediction time: 0.2576 seconds


### List of lists containing example Images for gradio

In [18]:
example_list = [[str(filepath)] for filepath in random.sample(test_data_paths, k=3)]
example_list

[['data\\pizza_steak_sushi_20_percent\\test\\sushi\\46797.jpg'],
 ['data\\pizza_steak_sushi_20_percent\\test\\steak\\730464.jpg'],
 ['data\\pizza_steak_sushi_20_percent\\test\\steak\\3553838.jpg']]

### Gradio Interface

In [19]:
title = "FoodVision Mini 🍕🥩🍣"
description = "An EfficientNetB0 feature extractor computer vision model to classify images of food as pizza, steak or sushi."
article = "Full Source code from scratch [deployment.ipynb](https://github.com/Victoran0/food-vision.git)."

In [21]:
demo = gr.Interface(fn=predict, inputs=gr.Image(type='pil'), outputs=[gr.Label(num_top_classes=3, label='Predictions'), gr.Number(label="Prediction time (s)")], examples=example_list, title=title, description=description, article=article)

### Launching the Demo

In [27]:
demo.launch(debug=False, share=True)

Running on local URL:  http://127.0.0.1:7860

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




In [26]:
demo.close()

Closing server running on port: 7860
