Reference: https://docs.pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

Personal Notes
- https://docs.pytorch.org/docs/stable/tensorboard.html
- https://docs.pytorch.org/vision/main/models/convnext.html
- https://github.com/pytorch/vision/blob/main/torchvision/models/convnext.py

In [None]:
import os
import sys

import numpy as np
import torch
import torchvision
from tqdm.notebook import trange, tqdm
import time

# set seed to make notebook reproducable
np.random.seed(42)
torch.cuda.manual_seed_all(42)
torch.manual_seed(42)
generator = torch.Generator().manual_seed(42)

##### Hyperparameters

In [None]:
BATCH_SIZE = 32
NUM_EPOCHS = 10
LEARNING_RATE = 1e-3

##### Load Data
Using this dataset: https://www.kaggle.com/datasets/echometerhhwl/pokemon-gen-1-38914

Other datasets we found:
- https://www.kaggle.com/datasets/thedagger/pokemon-generation-one
- https://www.kaggle.com/datasets/lantian773030/pokemonclassification
- https://www.kaggle.com/datasets/kvpratama/pokemon-images-dataset
- https://www.kaggle.com/datasets/adamstanley537/pokemon-gen-1-image-dataset-27000-images

In [None]:
from torchvision import transforms
data_transforms = transforms.Compose(
    [
        transforms.Resize(512),
        transforms.RandomRotation(15),
        transforms.RandomHorizontalFlip(),
        transforms.RandomResizedCrop(224),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]
)

data = torchvision.datasets.ImageFolder(root='data/pokemon', transform=data_transforms)
train_set, val_set, test_set = torch.utils.data.random_split(data, [0.7, 0.15, 0.15], generator)

image_datasets = {
    'train': train_set,
    'val': val_set,
    'test': test_set
}
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
    for x in ['train', 'val', 'test']
}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

##### Training Code

In [None]:
def train(model, optimizer, criterion, scheduler):
    model.train()
    running_loss = 0.0
    running_corrects = 0

    for images, labels in dataloaders['train']:
        images, labels = images.to('cuda'), labels.to('cuda')

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        # backward
        loss.backward()
        optimizer.step()

        # statistics
        running_loss += loss.item() * images.size(0)
        running_corrects += torch.sum(preds == labels.data)

    scheduler.step()

    epoch_loss = running_loss / dataset_sizes['train']
    epoch_acc = running_corrects.double() / dataset_sizes['train']
    return epoch_loss, epoch_acc

def validate(model, criterion, option):
    model.eval()
    running_loss = 0.0
    running_corrects = 0

    with torch.no_grad():  # no gradient tracking
        for images, labels in dataloaders[option]:
            images, labels = images.to('cuda'), labels.to('cuda')

            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / dataset_sizes[option]
    epoch_acc = running_corrects.double() / dataset_sizes[option]
    return epoch_loss, epoch_acc

In [None]:
path = 'results/'
best_model_path = os.path.join(path, 'best_model.pt')

# reference: https://docs.pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
def train_model(model, optimizer, criterion, scheduler, num_epochs=NUM_EPOCHS):
    time_start = time.time()
    best_acc = 0.0
    print('Training started...\n')

    for epoch in tqdm(range(num_epochs)):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        
        train_loss, train_acc = train(model, optimizer, criterion, scheduler)
        val_loss, val_acc = validate(model, criterion, 'val')

        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
        print('-' * 10)
        print()

        # save best model
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), best_model_path)
    
    time_elapsed = time.time() - time_start
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

##### Create Model

In [None]:
from torchvision.models import convnext_large, ConvNeXt_Large_Weights

model = convnext_large(weights=ConvNeXt_Large_Weights.DEFAULT)
# model.classifer[2] is the linear layer
num_feats = model.classifier[2].in_features
model.classifier[2] = torch.nn.Linear(num_feats, len(class_names)) # TODO: try dropout later
model = model.to('cuda')

##### Start Training

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS)

# train the convnext model
train_model(model, optimizer, criterion, scheduler, NUM_EPOCHS)

##### Load Model

In [None]:
try:
    model.load_state_dict(torch.load(best_model_path, weights_only=True))
    model.eval()
except:
    print('Create the model first.')

##### Accuracy on Test Set

In [None]:
criterion = torch.nn.CrossEntropyLoss()
validate(model, criterion, 'test')