# 09. Pytorch Model Deployment

In [1]:
## 0. Getting Setup

In [8]:
# Continue with regular imports
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

from going_modular import data_setup, engine
from helper_functions import download_data, set_seeds, plot_loss_curves

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# Make our device hardward agnostic
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[INFO] Using {device} device.")

[INFO] Using cpu device.


## 1. Getting Data

The dataset we're going to use for deployement is Food-101 dataset. It is a dataset containing 101000 images of food items, organized in 101 categories. The dataset is divided into two main folders: one for the training set and one for the test set. Each of these folders contains 101 subfolders, each one corresponding to a category. Each subfolder contains the images of the corresponding category. The images are in jpg format and have a resolution of 512x512 pixels. The dataset is available for download at the following link: https://www.kaggle.com/dansbecker/food-101

But now we will use a smaller version of the dataset, which contains only 3 categories: pizza, steak and sushi. The dataset is available for download at the following link: https://github.com/mrdbourke/pytorch-deep-learning/blob/main/data/pizza_steak_sushi_20_percent.zip

In [10]:
# Download pizza, steak, sushi images from GitHub
data_20_percent_path = download_data(source="https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip",
                                     destination="pizza_steak_sushi_20_percent")

data_20_percent_path

[INFO] Did not find data/pizza_steak_sushi_20_percent directory, creating one...
[INFO] Downloading pizza_steak_sushi_20_percent.zip from https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip...
[INFO] Unzipping pizza_steak_sushi_20_percent.zip data...


PosixPath('data/pizza_steak_sushi_20_percent')

In [12]:
# Setup training and testing data paths
train_dir = data_20_percent_path / "train"
test_dir = data_20_percent_path / "test"

train_dir, test_dir

(PosixPath('data/pizza_steak_sushi_20_percent/train'),
 PosixPath('data/pizza_steak_sushi_20_percent/test'))

## 2. FoodVision Mini : Model Deployement

### 3 questions to ask before building a model :
1. What is my most ideal machine learning model deployment scenario?
2. WHere is mu model going good?
3. How is my model going function in the real world?

#### 1. What is my most ideal machine learning model deployment scenario?
- **Scenario 1**: Model deployed on a smart phone
- **Scenario 2**: Model deployed on a web application
- **Scenario 3**: Model deployed on a server in the cloud

#### 2. Where is my model going to be used?
- **Location 1**: On a smart phone
- **Location 2**: On a web application
- **Location 3**: On a server in the cloud

#### 3. How is my model going to function in the real world?
- **Function 1**: Model is going to be used for real-time inference
- **Function 2**: Model is going to be used for batch inference
- **Function 3**: Model is going to be used for training other models


**FoodVision Mini ideal use case**: A model deployed on a web application for real-time inference.

1. Performs well: 95%+ accuracy on test set
2. Fast: Makes predictions in under 100ms (including pre and post processing) on a CPU or GPU (mobile phone or computer)

## 3. Creating an EfficientNetB3 extraction model

Feature extraction involves taking the pre-trained patterns a model has learned from another dataset and applying it to our own problem.

We can use the `torchvision` library to access a range of pre-trained models. In this case, we'll use the EfficientNet family of models.

EfficientNet is a family of convolutional neural networks that have been trained on ImageNet (a large dataset of images). The EfficientNet models are known for their efficiency and accuracy. We can use the pre-trained EfficientNet models to extract features from our own images.

EfficientNetB3 pretrained model is available in : https://pytorch.org/vision/stable//models/generated/torchvision.models.efficientnet_b3.html#torchvision.models.efficientnet_b3

In [14]:
import torchvision

# 1. Setup pretrained EfficientNetB3 weights
efficientb3_weights = torchvision.models.EfficientNet_B3_Weights.DEFAULT

# 2. Get EfficientNetB3 transforms
efficientb3_transforms = efficientb3_weights.transforms()

# 3. Setup pretrained EfficientNetB3 model
efficientb3_model = torchvision.models.efficientnet_b3(weights=efficientb3_weights)

# 4. Freeze all layers in the base model
for param in efficientb3_model.parameters():
    param.requires_grad = False

In [15]:
efficientb3_model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

In [18]:
# Get the summary of the model
from torchinfo import summary

# summary(efficientb3_model, input_size=(1, 3, 224, 224))


In [19]:
efficientb3_model.classifier

Sequential(
  (0): Dropout(p=0.3, inplace=True)
  (1): Linear(in_features=1536, out_features=1000, bias=True)
)

In [22]:
efficientb3_model.classifier = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(in_features=1536, out_features=3, bias=True)
)

efficientb3_model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 40, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(40, 40, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=40, bias=False)
            (1): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(40, 10, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(10, 40, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

### 3.1 Creating a function to make an EfficientNetB3 model

In [24]:
def create_efficientnetb3_model(num_classes: int = 3):
    """Creates an instance of EfficientNetB3 model with a custom final layer.

    Args:
        num_classes (int): Number of classes in the final layer of the model. Defaults to 3.

    Returns:
        nn.Module: A customised version of the EfficientNetB3 model with a custom final layer.
        Transforms: Image transformations to be used with the model.
    """

    # 1. Setup pretrained EfficientNetB3 weights
    weights = torchvision.models.EfficientNet_B3_Weights.DEFAULT

    # 2. Get EfficientNetB3 transforms
    transforms = weights.transforms()

    # 3. Setup pretrained EfficientNetB3 model
    mdoel = torchvision.models.efficientnet_b3(weights=weights)

    # 4. Freeze all layers in the base model
    for param in mdoel.parameters():
        param.requires_grad = False

    # 5. Change the final layer to output `num_classes` classes
    mdoel.classifier = nn.Sequential(
        nn.Dropout(0.3),
        nn.Linear(in_features=1536, out_features=num_classes, bias=True)
    )

    return mdoel, transforms

In [26]:
efficientb3_model, efficientb3_transforms = create_efficientnetb3_model(num_classes=3)

efficientb3_transforms

ImageClassification(
    crop_size=[300]
    resize_size=[320]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)