In [1]:
# Transfer learning: take the patterns another model has learned from another problem
# and use them for our own problem.

# Examples:
# Computer vision models leaned from large image dataset such as ImageNet
# Large Language Models trained from large amount of text to learn representation of language

# Across a wide range of datasets, even if the downstream data of interest appears to only be weakly
# related to the data used for pre-training, transfer learning remains the best available option.

In [2]:
# Places to find pretrained models:
# 
# PyTorch libraries
# e.g., torchvision.models, torchtext.models, torchaudio.models, torchrec.models
#
# HuggingFace Hub
# https://huggingface.co/models
# https://huggingface.co/datasets

In [3]:
# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+
try:
    import torch
    import torchvision
    assert int(torch.__version__.split(".")[1]) >= 12, "torch version should be 1.12+"
    assert int(torchvision.__version__.split(".")[1]) >= 13, "torchvision version should be 0.13+"
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")
except:
    print(f"[INFO] torch/torchvision versions not as required, installing nightly versions.")
    !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113
    import torch
    import torchvision
    print(f"torch version: {torch.__version__}")
    print(f"torchvision version: {torchvision.__version__}")

[INFO] torch/torchvision versions not as required, installing nightly versions.
Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu113
torch version: 2.0.0+cu117
torchvision version: 0.15.1+cu117


In [4]:
import matplotlib.pyplot as plt
import torch
import torchvision

from torch import nn
from torchvision import transforms

from torchinfo import summary

In [5]:
from src_05_modular import data_setup, engine, utils

In [6]:
device = utils.get_device()
device

'cuda'

In [7]:
# Setup data

In [8]:
import pathlib

In [9]:
data_path = pathlib.Path("data/")
image_path = data_path/"pizza_steak_sushi"

In [10]:
train_dir = image_path/"train"
test_dir = image_path/"test"

## Datasets and DataLoaders

### 1. Create Transforms for torchvision.models: manual creation

In [11]:
# When using a pre-trained model, your custom data must be prepared the same way 
# as the original training data that went into the model.

In [12]:
# Specifically, computer vision models in torchvision expect input images normalized in the following way:
# * Mini-batches of three-channel RGB images of shape 3*Height*Width, with H, W at least 224.
# * Images have to be loaded in to a range of [0, 1], and then normalize using
# mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]

# The above steps in PyTorch code are:
# 1. Mini-batches of size [batch_size, 3, height, width], using
# torchvision.transforms.Resize() and then torch.utils.data.DataLoader() to create batches
# 2. Values between 0 and 1, using
# torchvision.transforms.ToTensor()
# 3. Normalize using
# torchvision.transforms.Normalize(mean=, std=)

In [13]:
manual_transforms = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225])
])

manual_transforms

Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [14]:
batch_size = 32

In [15]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=manual_transforms,
    batch_size=batch_size
)

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x256cea46910>,
 <torch.utils.data.dataloader.DataLoader at 0x256cea461f0>,
 ['pizza', 'steak', 'sushi'])

### 2. Create Transforms for torchvision.models: auto creation

In [16]:
# As of torchvision v0.13+, auto transform creation feature has been added.
# Say you'd like to use the following model: ModelABC, you can obtain
# weights = torchvision.models.ModelABC.DEFAULT 
# (DEFAULT indicates the best available weights for the chosen model architecture)

In [17]:
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT
weights

EfficientNet_B0_Weights.IMAGENET1K_V1

In [18]:
# Now, access the transforms associated with the above model's weights
auto_transforms = weights.transforms()
auto_transforms

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

In [19]:
# Same way for train/test dataloaders

In [20]:
train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(
    train_dir=train_dir,
    test_dir=test_dir,
    transform=auto_transforms,
    batch_size=batch_size
)

train_dataloader, test_dataloader, class_names

(<torch.utils.data.dataloader.DataLoader at 0x256cea46040>,
 <torch.utils.data.dataloader.DataLoader at 0x256cea46580>,
 ['pizza', 'steak', 'sushi'])