# Assignment 3: Image Classification

In [1]:
# check GPU type.
!nvidia-smi

Tue Nov 28 06:01:56 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

### Import Packages

In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm
from torchvision.datasets import DatasetFolder, VisionDataset
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset

In [3]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

### Transforms

In [4]:
test_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(90, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
])


### Datasets

In [5]:
class FoodDataset(Dataset):
    def __init__(self, tfm, path="../input/food-11", isTrain=True):
        super(FoodDataset).__init__()
        if isTrain:
            self.train_path = "/kaggle/input/ml2023spring-hw3/train"
            self.valid_path = "/kaggle/input/ml2023spring-hw3/valid"
            self.files = [self.train_path + "/" + x for x in os.listdir(self.train_path) if x.endswith(".jpg")]
            self.files += [self.valid_path + "/" + x for x in os.listdir(self.valid_path) if x.endswith(".jpg")]
            np.random.shuffle(self.files)
        else:
            self.path = path + "/test"
            self.files = sorted([self.path + "/" + x for x in os.listdir(self.path) if x.endswith(".jpg")])

        self.transform = tfm

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)

        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1  # test has no label

        return im, label


### Models

In [6]:
from torchvision import models


class Resnet(nn.Module):
    def __init__(self, n_class):
        super(Resnet, self).__init__()
        self.cnn = models.resnet18(weights=None)
        self.cnn.fc = nn.Linear(512, n_class)

    def forward(self, x):
        return self.cnn(x)


### Configurations

In [7]:
device =  "cuda" if torch.cuda.is_available() else "cpu"

# hyperparameters
batch_size = 128
n_epochs = 50

patience = 8  # If no improvement in 'patience' epochs, early stop.


### Construct Dataset

In [8]:
dataset = FoodDataset(train_tfm)

### Start Training

In [9]:
from sklearn.model_selection import KFold
from torch.utils.data import SubsetRandomSampler

In [10]:
_exp_name = "resnet18"
fold_idx = 0

In [11]:
def adjust_learning_rate(optimizer):
    isPrint = False
    for param_group in optimizer.param_groups:
        if isPrint == False:
            lr = param_group["lr"]
            print(f"--- Learning rate decreases from {lr:.6f} to {lr * 0.8:.6f}. ---")
            isPrint = True
        param_group["lr"] = param_group["lr"] * 0.8


In [12]:
kf = KFold(n_splits=4)

for fold, (train_idx, valid_idx) in enumerate(kf.split(dataset)):
    if fold != fold_idx:
        continue
    
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, num_workers=0, pin_memory=True )
    valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=0, pin_memory=True )

    model = Resnet(11).to(device)
    criterion = nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    
    # Initialize trackers, these are not parameters and should not be changed
    stale = 0
    best_acc = 0

    for epoch in range(n_epochs):
        if stale > 5:
            adjust_learning_rate(optimizer)

        # ---------- Training ----------
        model.train()
        train_loss = []
        train_accs = []

        with tqdm(total=len(train_loader), unit="batch") as tqdm_bar:
            tqdm_bar.set_description(f"Epoch {epoch + 1:03d}/{n_epochs:03d}")
            for batch in train_loader:
                imgs, labels = batch

                # Forward the data.
                logits = model(imgs.to(device))

                # Calculate the cross-entropy loss.
                loss = criterion(logits, labels.to(device))

                # Gradients stored in the parameters in the previous step should be cleared out first.
                optimizer.zero_grad()

                # Compute the gradients for parameters.
                loss.backward()

                # Clip the gradient norms for stable training.
                grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

                # Update the parameters with computed gradients.
                optimizer.step()

                # Compute the accuracy for current batch.
                acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

                # Record the loss and accuracy.
                train_loss.append(loss.item())
                train_accs.append(acc)

                tqdm_bar.update(1)
                tqdm_bar.set_postfix(loss=f"{sum(train_loss)/len(train_loss):.5f}", acc=f"{sum(train_accs) / len(train_accs):.5f}", val_loss=f"{0:.5f}", val_acc=f"{0:.5f}")

            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            tqdm_bar.set_postfix(loss=f"{train_loss:.5f}", acc=f"{train_acc:.5f}", val_loss=f"{0:.5f}", val_acc=f"{0:.5f}")

            # ---------- Validation ----------
            model.eval()
            valid_loss = []
            valid_accs = []

            for batch in valid_loader:
                imgs, labels = batch

                # Using torch.no_grad() accelerates the forward process.
                with torch.no_grad():
                    logits = model(imgs.to(device))

                # We can still compute the loss (but not the gradient).
                loss = criterion(logits, labels.to(device))

                # Compute the accuracy for current batch.
                acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

                # Record the loss and accuracy.
                valid_loss.append(loss.item())
                valid_accs.append(acc)

                tqdm_bar.set_postfix(
                    loss=f"{train_loss:.5f}", acc=f"{train_acc:.5f}", val_loss=f"{sum(valid_loss) / len(valid_loss):.5f}", val_acc=f"{sum(valid_accs) / len(valid_accs):.5f}"
                )

            # The average loss and accuracy for entire validation set is the average of the recorded values.
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)

            tqdm_bar.set_postfix(loss=f"{train_loss:.5f}", acc=f"{train_acc:.5f}", val_loss=f"{valid_loss:.5f}", val_acc=f"{valid_acc:.5f}")
            tqdm_bar.close()

        # update logs
        if valid_acc > best_acc:
            with open(f"./{_exp_name}_log.txt", "a") as f:
                f.write(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best\n")
        else:
            with open(f"./{_exp_name}_log.txt", "a") as f:
                f.write(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}\n")

        # save models
        if valid_acc > best_acc:
            print(f"Best model found at epoch {epoch+1}, saving model")
            torch.save(model.state_dict(), f"{_exp_name}_best.ckpt")  # only save best to prevent output memory exceed error
            best_acc = valid_acc
            stale = 0
        else:
            stale += 1
            if stale > patience:
                print(f"No improvment {patience} consecutive epochs, early stopping")
                break


Epoch 001/050: 100%|██████████| 80/80 [05:37<00:00,  4.22s/batch, acc=0.29753, loss=2.01445, val_acc=0.24225, val_loss=2.27763]


Best model found at epoch 1, saving model


Epoch 002/050: 100%|██████████| 80/80 [03:24<00:00,  2.56s/batch, acc=0.36571, loss=1.80373, val_acc=0.22767, val_loss=2.41918]
Epoch 003/050: 100%|██████████| 80/80 [03:25<00:00,  2.57s/batch, acc=0.41415, loss=1.68677, val_acc=0.26193, val_loss=2.30472]


Best model found at epoch 3, saving model


Epoch 004/050: 100%|██████████| 80/80 [03:24<00:00,  2.56s/batch, acc=0.43445, loss=1.63407, val_acc=0.39667, val_loss=1.73052]


Best model found at epoch 4, saving model


Epoch 005/050: 100%|██████████| 80/80 [03:25<00:00,  2.57s/batch, acc=0.46583, loss=1.53982, val_acc=0.40645, val_loss=1.72991]


Best model found at epoch 5, saving model


Epoch 006/050: 100%|██████████| 80/80 [03:24<00:00,  2.56s/batch, acc=0.47104, loss=1.52054, val_acc=0.36414, val_loss=1.91391]
Epoch 007/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.49834, loss=1.45519, val_acc=0.41750, val_loss=1.77297]


Best model found at epoch 7, saving model


Epoch 008/050: 100%|██████████| 80/80 [03:25<00:00,  2.56s/batch, acc=0.50965, loss=1.40926, val_acc=0.35438, val_loss=1.93259]
Epoch 009/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.52417, loss=1.35855, val_acc=0.44807, val_loss=1.66940]


Best model found at epoch 9, saving model


Epoch 010/050: 100%|██████████| 80/80 [03:25<00:00,  2.57s/batch, acc=0.53525, loss=1.32315, val_acc=0.44123, val_loss=1.63145]
Epoch 011/050: 100%|██████████| 80/80 [03:26<00:00,  2.58s/batch, acc=0.54416, loss=1.29998, val_acc=0.33038, val_loss=2.38753]
Epoch 012/050: 100%|██████████| 80/80 [03:26<00:00,  2.58s/batch, acc=0.55856, loss=1.25709, val_acc=0.53497, val_loss=1.40553]


Best model found at epoch 12, saving model


Epoch 013/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.57524, loss=1.21509, val_acc=0.48110, val_loss=1.56049]
Epoch 014/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.59233, loss=1.17946, val_acc=0.50930, val_loss=1.41401]
Epoch 015/050: 100%|██████████| 80/80 [03:24<00:00,  2.55s/batch, acc=0.60194, loss=1.13987, val_acc=0.40406, val_loss=2.08068]
Epoch 016/050: 100%|██████████| 80/80 [03:26<00:00,  2.58s/batch, acc=0.61602, loss=1.09808, val_acc=0.46224, val_loss=1.71906]
Epoch 017/050: 100%|██████████| 80/80 [03:25<00:00,  2.57s/batch, acc=0.61736, loss=1.09475, val_acc=0.53005, val_loss=1.36577]
Epoch 018/050: 100%|██████████| 80/80 [03:25<00:00,  2.57s/batch, acc=0.63096, loss=1.05951, val_acc=0.52060, val_loss=1.42413]


--- Learning rate decreases from 0.001000 to 0.000800. ---


Epoch 019/050: 100%|██████████| 80/80 [03:32<00:00,  2.66s/batch, acc=0.65870, loss=0.98360, val_acc=0.55641, val_loss=1.29653]


Best model found at epoch 19, saving model


Epoch 020/050: 100%|██████████| 80/80 [03:31<00:00,  2.64s/batch, acc=0.66544, loss=0.95413, val_acc=0.50274, val_loss=1.61676]
Epoch 021/050: 100%|██████████| 80/80 [03:30<00:00,  2.63s/batch, acc=0.67228, loss=0.94201, val_acc=0.56901, val_loss=1.32419]


Best model found at epoch 21, saving model


Epoch 022/050: 100%|██████████| 80/80 [03:26<00:00,  2.59s/batch, acc=0.67895, loss=0.92022, val_acc=0.63899, val_loss=1.06802]


Best model found at epoch 22, saving model


Epoch 023/050: 100%|██████████| 80/80 [03:26<00:00,  2.58s/batch, acc=0.69854, loss=0.87376, val_acc=0.61314, val_loss=1.17020]
Epoch 024/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.70053, loss=0.86961, val_acc=0.56393, val_loss=1.38698]
Epoch 025/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.70340, loss=0.85014, val_acc=0.62682, val_loss=1.14368]
Epoch 026/050: 100%|██████████| 80/80 [03:27<00:00,  2.60s/batch, acc=0.71305, loss=0.82748, val_acc=0.58145, val_loss=1.30940]
Epoch 027/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.72732, loss=0.79520, val_acc=0.58040, val_loss=1.33400]
Epoch 028/050: 100%|██████████| 80/80 [03:27<00:00,  2.60s/batch, acc=0.73072, loss=0.78864, val_acc=0.62679, val_loss=1.10117]


--- Learning rate decreases from 0.000800 to 0.000640. ---


Epoch 029/050: 100%|██████████| 80/80 [03:28<00:00,  2.60s/batch, acc=0.75079, loss=0.72040, val_acc=0.62456, val_loss=1.16805]


--- Learning rate decreases from 0.000640 to 0.000512. ---


Epoch 030/050: 100%|██████████| 80/80 [03:29<00:00,  2.62s/batch, acc=0.76036, loss=0.68538, val_acc=0.66470, val_loss=0.98742]


Best model found at epoch 30, saving model


Epoch 031/050: 100%|██████████| 80/80 [03:30<00:00,  2.63s/batch, acc=0.76742, loss=0.66741, val_acc=0.65567, val_loss=1.08488]
Epoch 032/050: 100%|██████████| 80/80 [03:31<00:00,  2.65s/batch, acc=0.76854, loss=0.65485, val_acc=0.65873, val_loss=1.07985]
Epoch 033/050: 100%|██████████| 80/80 [03:32<00:00,  2.65s/batch, acc=0.77066, loss=0.64841, val_acc=0.69555, val_loss=0.93576]


Best model found at epoch 33, saving model


Epoch 034/050: 100%|██████████| 80/80 [03:29<00:00,  2.62s/batch, acc=0.78167, loss=0.63510, val_acc=0.64907, val_loss=1.11521]
Epoch 035/050: 100%|██████████| 80/80 [03:28<00:00,  2.61s/batch, acc=0.78685, loss=0.61380, val_acc=0.70675, val_loss=0.92117]


Best model found at epoch 35, saving model


Epoch 036/050: 100%|██████████| 80/80 [03:27<00:00,  2.60s/batch, acc=0.79450, loss=0.59214, val_acc=0.64066, val_loss=1.20234]
Epoch 037/050: 100%|██████████| 80/80 [03:28<00:00,  2.61s/batch, acc=0.79536, loss=0.58443, val_acc=0.65509, val_loss=1.10106]
Epoch 038/050: 100%|██████████| 80/80 [03:28<00:00,  2.61s/batch, acc=0.79465, loss=0.58527, val_acc=0.69623, val_loss=0.97191]
Epoch 039/050: 100%|██████████| 80/80 [03:30<00:00,  2.64s/batch, acc=0.79917, loss=0.56991, val_acc=0.68449, val_loss=0.99700]
Epoch 040/050: 100%|██████████| 80/80 [03:26<00:00,  2.58s/batch, acc=0.80989, loss=0.55401, val_acc=0.71286, val_loss=0.87849]


Best model found at epoch 40, saving model


Epoch 041/050: 100%|██████████| 80/80 [03:30<00:00,  2.63s/batch, acc=0.81100, loss=0.54732, val_acc=0.70015, val_loss=0.97467]
Epoch 042/050: 100%|██████████| 80/80 [03:30<00:00,  2.63s/batch, acc=0.80637, loss=0.55251, val_acc=0.62267, val_loss=1.38125]
Epoch 043/050: 100%|██████████| 80/80 [03:31<00:00,  2.65s/batch, acc=0.81316, loss=0.52144, val_acc=0.67311, val_loss=1.09730]
Epoch 044/050: 100%|██████████| 80/80 [03:32<00:00,  2.65s/batch, acc=0.82208, loss=0.51336, val_acc=0.72529, val_loss=0.86585]


Best model found at epoch 44, saving model


Epoch 045/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.82207, loss=0.50662, val_acc=0.73384, val_loss=0.89309]


Best model found at epoch 45, saving model


Epoch 046/050: 100%|██████████| 80/80 [03:29<00:00,  2.61s/batch, acc=0.82585, loss=0.49743, val_acc=0.68813, val_loss=1.06234]
Epoch 047/050: 100%|██████████| 80/80 [03:27<00:00,  2.59s/batch, acc=0.82332, loss=0.49448, val_acc=0.72301, val_loss=0.91868]
Epoch 048/050: 100%|██████████| 80/80 [03:28<00:00,  2.61s/batch, acc=0.83820, loss=0.46138, val_acc=0.67537, val_loss=1.08762]
Epoch 049/050: 100%|██████████| 80/80 [03:28<00:00,  2.61s/batch, acc=0.84217, loss=0.46163, val_acc=0.72169, val_loss=0.91041]
Epoch 050/050: 100%|██████████| 80/80 [03:29<00:00,  2.61s/batch, acc=0.83909, loss=0.44932, val_acc=0.73021, val_loss=0.89763]


### Dataloader for test

In [13]:
# Construct test datasets.
test_set = FoodDataset(test_tfm, isTrain=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

FileNotFoundError: [Errno 2] No such file or directory: '../input/food-11/test'

### Test Time Augmentation

In [None]:
tta_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(90, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.RandomGrayscale(p=0.2),
    transforms.ToTensor(),
])

tta_num = 5

### Testing and generate prediction CSV

In [None]:
model_best = Resnet(11).to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()

prediction = []
with torch.no_grad():
    for data, _ in tqdm(test_loader):
        for img in data:
            test_input = img.view(1, 3, 224, 224)
            test_pred = model_best(test_input.to(device))
            test_pred = test_pred.cpu().data.numpy()

            # test time augmentation
            tta_pred = np.zeros((1, 11))
            for _ in range(tta_num):
                test_augmented = tta_transform(img)
                test_augmented = test_augmented.view(1, 3, 224, 224)
                pred = model_best(test_augmented.to(device))
                tta_pred = tta_pred + pred.cpu().data.numpy()
            tta_pred = tta_pred / tta_num
            
            # final prediction
            test_label = np.argmax(test_pred * 0.7 + tta_pred * 0.3)

            prediction.append(test_label)


In [None]:
# create test csv
def pad4(i):
    return "0" * (4 - len(str(i))) + str(i)


df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv(f"{_exp_name}_submission.csv", index=False)
