## Thai Food Classification using Pretrained Model CNN from Torchvision

- This notebook uses `torchvision` to load pretrained model (ResNet34) to fine-tune Thai food classification model (Transfer Learning)
- Reference: https://github.com/udacity/deep-learning-v2-pytorch/blob/master/transfer-learning/Transfer_Learning_Solution.ipynb

In [None]:
import os
import os.path as op
import numpy as np
from glob import glob
import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
from torchvision import datasets, models, transforms

In [None]:
%%capture
!git clone https://github.com/GemmyTheGeek/FoodyDudy.git # download data from FuodyDudy git

In [None]:
food_list = [
    'green_curry', 'tepo_curry', 'liang_curry', 'taohoo_moosup', 'mara_yadsai',
    'masaman', 'orange_curry', 'cashew_chicken', 'omelette', 'sunny_side_up',
    'palo_egg', 'sil_egg', 'nun_banana', 'kua_gai', 'cabbage_fish_sauce',
    'river_prawn', 'shrimp_ob_woonsen', 'kanom_krok', 'mango_sticky_rice', 'kao_kamoo',
    'kao_klook_kapi', 'kaosoi', 'kao_pad', 'kao_pad_shrimp', 'chicken_rice',
    'kao_mok_gai', 'tom_ka_gai', 'tom_yum_kung', 'tod_mun', 'poh_pia',
    'pak_boong_fai_daeng', 'padthai', 'pad_krapao', 'pad_si_ew', 'pad_fakthong',
    'eggplant_stirfry', 'pad_hoi_lai', 'foithong', 'panaeng', 'yum_tua_ploo',
    'yum_woonsen', 'larb_moo', 'pumpkin_custard', 'sakoo_sai_moo', 'somtam',
    'moopoing','satay', 'hor_mok'
]
id2food = {i: f for i, f in enumerate(food_list)}
train_on_gpu = torch.cuda.is_available() # check if CUDA is available
n_classes = len(food_list)

In [None]:
root_dir = "FoodyDudy/images/"
train_dir = op.join(root_dir, "train")
val_dir = op.join(root_dir, "valid")
test_dir = op.join(root_dir, "test")

# optional: you can use `albumentations` library for augmentations
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.2)),
    transforms.ToTensor(),
    # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_data = datasets.ImageFolder(train_dir, transform=train_transform)
val_data = datasets.ImageFolder(val_dir, transform=val_transform)
test_data = datasets.ImageFolder(test_dir, transform=val_transform)

In [None]:
batch_size = 16
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
                                           num_workers=0, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size,
                                         num_workers=0, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
                                          num_workers=0, shuffle=True)  # set shuffle True just to see random photos

In [None]:
import matplotlib.pyplot as plt

images, labels = next(iter(train_loader))
fig = plt.figure(figsize=(25, 4))
# display 16 images with 2 rows, 8 columns
for idx in np.arange(16):
    ax = fig.add_subplot(2, 8, idx + 1, xticks=[], yticks=[]) # plot image
    plt.imshow(np.transpose(images[idx], (1, 2, 0)))
    ax.set_title(id2food[int(labels[idx])]) # set title

In [None]:
resnet34 = models.resnet34(pretrained=True)
print(resnet34)

In [None]:
# for param in resnet34.parameters():
#     param.requires_grad = False

n_inputs = 512
resnet34.fc = nn.Linear(n_inputs, n_classes)

if train_on_gpu:
    resnet34.cuda()

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet34.parameters(), lr=1e-2)

In [None]:
n_epochs = 4 # number of epochs to train the model
print_every_iter = 100 # print every 100 iterations (100 * batch size)

for epoch in range(1, n_epochs + 1):

    # keep track of training and validation loss
    train_loss = 0.0

    resnet34.train()
    # model by default is set to train
    for batch_i, (data, target) in enumerate(train_loader):
        # move tensors to GPU if CUDA is available
        if train_on_gpu:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = resnet34(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

        if batch_i % print_every_iter == (print_every_iter - 1):  # print training loss every specified number of mini-batches
            print('Epoch %d, Batch %d loss: %.16f' %
                  (epoch, batch_i + 1, train_loss / 20))
            train_loss = 0.0

Now we can test predicting on a sample batch from `test_loader`

In [None]:
resnet34.eval()
images, labels = next(iter(test_loader))
if train_on_gpu:
    images = images.cuda()

# get sample outputs
preds = resnet34(images)
# convert output probabilities to predicted class
_, preds_tensor = torch.max(preds, 1)
preds = np.squeeze(preds_tensor.numpy()) if not train_on_gpu else np.squeeze(preds_tensor.cpu().numpy())

In [None]:
# comparing labels to predictions
print("Labels:      ", labels.tolist())
print("Predictions: ", preds.tolist())

In [None]:
from sklearn.metrics import classification_report

y_pred, y_true = [], []
for images, labels in test_loader:
    if train_on_gpu:
        images, labels = images.cuda(), labels.cuda()
    pred = resnet34(images).argmax(dim=1)
    y_pred.extend(pred.cpu().tolist())
    y_true.extend(labels.cpu().tolist())

print(classification_report(y_true, y_pred))

## Visualize test sample

In [None]:
images, labels = next(iter(test_loader))
if train_on_gpu:
    images = images.cuda()
preds = resnet34(images).argmax(dim=1)
preds = np.squeeze(preds.numpy()) if not train_on_gpu else np.squeeze(preds.cpu().numpy())

fig = plt.figure(figsize=(25, 4))
# display 16 images with 2 rows, 8 columns
for idx in np.arange(16):
    ax = fig.add_subplot(2, 8, idx + 1, xticks=[], yticks=[]) # plot image
    images_np = images.cpu().numpy() if train_on_gpu else images.numpy()
    plt.imshow(np.transpose(images_np[idx], (1, 2, 0)))
    ax.set_title(
        id2food[int(preds[idx])],
        color=("green" if preds[idx] == labels[idx].item() else "red")
    ) # set title with color green if the prediction is equal to label else red

Some limitations for pure Pytorch code:
- Need to specify if we want to move the data/model to GPU
- The training is lengthly similar to regular Pytorch code

Alternative libraries: Pytorch Lightning, Ignite