### Import necessary Libraries

In [1]:
import torch
from torch import nn
import torchvision
from torchinfo import summary
from torchvision import datasets
from pathlib import Path
import os
from typing import Dict, List, Tuple
from tqdm.auto import tqdm

In [2]:
NUM_WORKERS = 2 if os.cpu_count() <= 4 else 4

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
device, NUM_WORKERS

('cuda', 4)

### Load our EffnetB2 Model

In [5]:
def create_effnetb2(num_classes: int=101, seed: int=42):
    effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT
    effnetb2 = torchvision.models.efficientnet_b2(effnetb2_weights)
    effnetb2_transforms = effnetb2_weights.transforms()

    for param in effnetb2.parameters():
        param.requires_grad = False

    torch.manual_seed(42)
    effnetb2.classifier = nn.Sequential(
        nn.Dropout(p=0.3, inplace=True),
        nn.Linear(in_features=1408, out_features=num_classes)
    )

    return effnetb2.to(device), effnetb2_transforms

In [6]:
effnetb2_food101, effnetb2_transforms = create_effnetb2()



In [6]:
summary(effnetb2_food101,
        input_size=(1, 3, 224, 224),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [1, 3, 224, 224]     [1, 101]             --                   Partial
├─Sequential (features)                                      [1, 3, 224, 224]     [1, 1408, 7, 7]      --                   False
│    └─Conv2dNormActivation (0)                              [1, 3, 224, 224]     [1, 32, 112, 112]    --                   False
│    │    └─Conv2d (0)                                       [1, 3, 224, 224]     [1, 32, 112, 112]    (864)                False
│    │    └─BatchNorm2d (1)                                  [1, 32, 112, 112]    [1, 32, 112, 112]    (64)                 False
│    │    └─SiLU (2)                                         [1, 32, 112, 112]    [1, 32, 112, 112]    --                   --
│    └─Sequential (1)                                        [1, 32, 112, 112]    [1, 1

### Downloading Data & Preprocessing

torchvision.transforms.TrivialAugmentWide() is the same data augmentation used by the pytorch team in their Computer Vision Recipe

In [7]:
food101_train_transforms = torchvision.transforms.Compose([
    torchvision.transforms.TrivialAugmentWide(),
    effnetb2_transforms
])

In [8]:
data_dir = Path("data")
if Path(data_dir / "food-101").exists() and Path("data/food-101/license_agreement.txt").is_file():
    train_data = datasets.Food101(
        root=data_dir, split="train", transform=food101_train_transforms, download=False)
    test_data = datasets.Food101(
        root=data_dir, split="test", transform=effnetb2_transforms, download=False)
else:
    train_data = datasets.Food101(root=data_dir, split="train", transform=food101_train_transforms, download=True)

    test_data = datasets.Food101(root=data_dir, split="test", transform=effnetb2_transforms, download=True)

In [9]:
food101_class_names = train_data.classes
food101_class_names[:10]

['apple_pie',
 'baby_back_ribs',
 'baklava',
 'beef_carpaccio',
 'beef_tartare',
 'beet_salad',
 'beignets',
 'bibimbap',
 'bread_pudding',
 'breakfast_burrito']

### Training on 20% of the full dataset

In [9]:
def split_dataset(dataset: torchvision.datasets, split_size: float=0.2, seed:int=42):
    length_1 = int(len(dataset) * split_size)
    length_2 = len(dataset) - length_1

    print(f"[INFO] Splitting dataset of length {len(dataset)} into splits of size {length_1} ({int(split_size*100)}%), {length_2} ({int((1-split_size)*100)}%)")

    random_split_1, random_split_2 = torch.utils.data.random_split(dataset, lengths=[length_1, length_2], generator=torch.manual_seed(seed))

    return random_split_1, random_split_2

In [10]:
train_data_food101_20_percent, _ = split_dataset(dataset=train_data,
                                                 split_size=0.2)

# Create testing 20% split of Food101
test_data_food101_20_percent, _ = split_dataset(dataset=test_data,
                                                split_size=0.2)

len(train_data_food101_20_percent), len(test_data_food101_20_percent)

[INFO] Splitting dataset of length 75750 into splits of size 15150 (20%), 60600 (80%)
[INFO] Splitting dataset of length 25250 into splits of size 5050 (20%), 20200 (80%)


(15150, 5050)

well, we will proceed to train on the full dataset

### Data Preprocessing

In [11]:
BATCH_SIZE = 32
train_dataloader_food101_20 = torch.utils.data.DataLoader(
    train_data_food101_20_percent, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test_dataloader_food101_20 = torch.utils.data.DataLoader(
    test_data_food101_20_percent, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)

In [14]:
BATCH_SIZE = 32
train_dataloader_food101 = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test_dataloader_food101 = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)

In [12]:
test_dataloader_food101.dataset, train_dataloader_food101.dataset

(Dataset Food101
     Number of datapoints: 25250
     Root location: data
     split=test
     StandardTransform
 Transform: ImageClassification(
                crop_size=[288]
                resize_size=[288]
                mean=[0.485, 0.456, 0.406]
                std=[0.229, 0.224, 0.225]
                interpolation=InterpolationMode.BICUBIC
            ),
 Dataset Food101
     Number of datapoints: 75750
     Root location: data
     split=train
     StandardTransform
 Transform: Compose(
                TrivialAugmentWide(num_magnitude_bins=31, interpolation=InterpolationMode.NEAREST, fill=None)
                ImageClassification(
                crop_size=[288]
                resize_size=[288]
                mean=[0.485, 0.456, 0.406]
                std=[0.229, 0.224, 0.225]
                interpolation=InterpolationMode.BICUBIC
            )
            ))

### Training

In [12]:
optimizer = torch.optim.Adam(params=effnetb2_food101.parameters(), lr=1e-3)
loss_fn = torch.nn.CrossEntropyLoss(label_smoothing=0.1)

In [15]:
results = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []}
epochs = 5

for epoch in tqdm(range(epochs)):
    effnetb2_food101.train()
    train_loss, train_acc = 0, 0

    for batch, (X,y) in enumerate(train_dataloader_food101):
        X, y = X.to(device), y.to(device)
        y_pred = effnetb2_food101(X)
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    effnetb2_food101.eval()
    test_loss, test_acc = 0, 0
    with torch.inference_mode():
        for batch, (X, y) in enumerate(test_dataloader_food101):
            X, y = X.to(device), y.to(device)
            test_pred_logits = effnetb2_food101(X)
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()
            test_pred_label = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_label == y).sum().item()/len(test_pred_label))


    train_loss = train_loss / len(train_dataloader_food101)
    train_acc = train_acc / len(train_dataloader_food101)

    test_loss = test_loss / len(test_dataloader_food101)
    test_acc = test_acc / len(test_dataloader_food101)

    print(
        f"Epoch: {epoch+1} | "
        f"train_loss: {train_loss:.4f} | "
        f"train_acc: {train_acc:.4f} | "
        f"test_loss: {test_loss:.4f} | "
        f"test_acc: {test_acc:.4f}"
    )

    results["train_loss"].append(train_loss)
    results["train_acc"].append(train_acc)
    results["test_loss"].append(test_loss)
    results["test_acc"].append(test_acc)

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 2.6909 | train_acc: 0.4709 | test_loss: 2.1004 | test_acc: 0.6297
Epoch: 2 | train_loss: 2.6219 | train_acc: 0.4886 | test_loss: 2.0609 | test_acc: 0.6410
Epoch: 3 | train_loss: 2.6050 | train_acc: 0.4967 | test_loss: 2.0575 | test_acc: 0.6417
Epoch: 4 | train_loss: 2.5983 | train_acc: 0.4949 | test_loss: 2.0590 | test_acc: 0.6424
Epoch: 5 | train_loss: 2.5928 | train_acc: 0.4984 | test_loss: 2.0391 | test_acc: 0.6470


lets save our beautiful model that beat the food101 paper result

In [16]:
target_dir = Path("models/")
target_name = Path("effnetb2_food101_complete_dataset.pth")

target_dir.mkdir(parents=True, exist_ok=True)
torch.save(obj=effnetb2_food101.state_dict(), f=target_dir/target_name)

In [17]:
model_path = target_dir/target_name

In [20]:
f"{model_path.stat().st_size // (1024*1024)} MB"

'30 MB'