# Experiment 01

- Loss function: ArcFace
- SwimB
- closed set

In [1]:
# !pip install ipywidgets

In [2]:
import os
import sys
import math
from pathlib import Path

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
from tqdm.notebook import tqdm

In [3]:
from src.dataset import SeaTurtleDataset, download_dataset
from src.arcface import ArcFace
from src.utils import get_device

In [4]:
# --- Configuration ---
IMG_SIZE = 224
DATA_DIR = './data/seaturtleid2022-subset'

paths = download_dataset()

img_dir=paths['images_path']

model_dir = './models'
Path(model_dir).mkdir(parents=True, exist_ok=True)
model_save_path = f'{model_dir}/filtered_closed_arcface_swin_b.pth'

train_csv_path = os.path.join(DATA_DIR, "metadata_closed_set_splits_train.csv")
eval_csv_path = os.path.join(DATA_DIR, "metadata_closed_set_splits_valid.csv")
test_csv_path = os.path.join(DATA_DIR, "metadata_closed_set_splits_test.csv")

Dataset downloaded and extracted to: /Users/nhut/.cache/kagglehub/datasets/wildlifedatasets/seaturtleid2022/versions/4


In [None]:
# swimb_normalize = transforms.Normalize(
#     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# train_transform = transforms.Compose([
#     transforms.Resize((IMG_SIZE, IMG_SIZE)),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     swimb_normalize,
# ])

# test_transform = transforms.Compose([
#     transforms.Resize((IMG_SIZE, IMG_SIZE)),
#     transforms.ToTensor(),
#     swimb_normalize,
# ])

swimb_normalize = transforms.Normalize(
    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    swimb_normalize,
])

test_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    swimb_normalize,
])

In [8]:
BATCH_SIZE = 10
annotations_path= f"{DATA_DIR}/annotations.json"

train_dataset = SeaTurtleDataset(
    metadata_path=train_csv_path, img_dir=img_dir, annotations_path=annotations_path, transform=train_transform)
eval_dataset = SeaTurtleDataset(
    metadata_path=eval_csv_path, img_dir=img_dir, annotations_path=annotations_path, transform=test_transform)
test_dataset = SeaTurtleDataset(
    metadata_path=test_csv_path, img_dir=img_dir, annotations_path=annotations_path, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

loading annotations into memory...
Done (t=0.28s)
creating index...
index created!
loading annotations into memory...
Done (t=0.16s)
creating index...
index created!
loading annotations into memory...
Done (t=0.16s)
creating index...
index created!


In [10]:
EMBEDDING_SIZE = 512
NUM_CLASSES = train_dataset.metadata['identity'].nunique()
EPOCHS = 10
LEARNING_RATE = 1e-4

device = get_device()

print(f'Using device: {device}')

# --- Swin B Backbone Model ---
model = models.swin_b(weights=models.Swin_B_Weights.IMAGENET1K_V1)
# Replace the final classification head with a layer that produces the embeddings
model.head = nn.Linear(model.head.in_features, EMBEDDING_SIZE)
print("Original head:", model.head)
print(model.head.in_features)
model.to(device)

# --- ArcFace Head & Loss Func ---
metric = ArcFace(num_classes=NUM_CLASSES, embedding_size=EMBEDDING_SIZE, scale=30.0, margin=0.50).to(device)
criterion = nn.CrossEntropyLoss()

# --- Optimizer ---
optimizer = optim.AdamW(
    list(model.parameters()) + list(metric.parameters()),
    lr=LEARNING_RATE
)

# --- Scheduler ---
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

Using device: mps
Original head: Linear(in_features=1024, out_features=512, bias=True)
1024


In [19]:
# --- Training Loop ---
best_acc = 0.0
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")

    for i, item in enumerate(progress_bar):
        images, labels = item['image'].to(device), item['label'].to(device)

        # Forward pass
        features = model(images)
        output = metric(features, labels)
        loss = criterion(output, labels)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        progress_bar.set_postfix({'loss': running_loss / (i + 1)})

    scheduler.step()

    epoch_loss = running_loss / len(train_loader)
    
    # Evaluation
    model.eval()
    correct = 0
    total = 0

    # Build gallery from training set
    gallery_embeddings = []
    gallery_labels = []
    with torch.no_grad():
        for item in train_loader:
            images = item['image'].to(device)
            labels = item['label']
            embeddings = model(images)
            embeddings = nn.functional.normalize(embeddings, p=2, dim=1)
            gallery_embeddings.append(embeddings.cpu())
            gallery_labels.append(labels)

    gallery_embeddings = torch.cat(gallery_embeddings, dim=0)
    gallery_labels = torch.cat(gallery_labels, dim=0)

    # Evaluate on eval set
    with torch.no_grad():
        for item in eval_loader:
            images = item['image'].to(device)
            labels = item['label'].to(device)
            embeddings = model(images)
            embeddings = nn.functional.normalize(embeddings, p=2, dim=1)
            
            # Compute cosine similarity with gallery
            similarities = torch.mm(embeddings,
                                    gallery_embeddings.to(device).t())
            
            # Get kNN (k=1 - top-1) prediction
            _, predicted_indices = torch.max(similarities, 1)
            predicted = gallery_labels[predicted_indices.cpu()].to(device)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    epoch_acc = 100 * correct / total
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {running_loss/len(train_loader):.4f}, Eval Accuracy: {epoch_acc:.2f}%")
    
    if epoch_acc > best_acc:
        best_acc = epoch_acc
        torch.save(model.state_dict(), model_save_path)
        print("Saved best model.")


print(f"Finished Training. Best Test Accuracy: {best_acc:.2f}%")

Epoch 1/10:   0%|          | 0/21 [00:00<?, ?it/s]

TypeError: Unexpected type <class 'numpy.ndarray'>