# AML - 2025 : Feather in Focus - The Baseline

In [1]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
from PIL import Image
from google.colab import drive

In [2]:
# 1. SETUP: Mount Google Drive
# ---------------------------------------------------------
drive.mount('/content/drive')

BASE_PATH = "/content/drive/MyDrive/AML2025"

DATA_PATH = os.path.join(BASE_PATH, "Dataset")

Mounted at /content/drive


In [4]:
# 2. DEFINE THE DATASET CLASS
# ---------------------------------------------------------
class BirdDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

        # FIX: The CSV labels are 1-200, but PyTorch needs 0-199.
        # We subtract 1 from every label.
        self.data['label'] = self.data['label'] - 1

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Get path from CSV (e.g., "/train_images/1.jpg")
        img_path = self.data.iloc[idx, 0]

        # Remove leading slash if present to join paths correctly
        if img_path.startswith("/"):
            img_path = img_path[1:]

        # Full path: /content/drive/.../train_images/1.jpg
        full_path = os.path.join(self.root_dir, img_path)

        # Load Image
        try:
            image = Image.open(full_path).convert("RGB")
        except FileNotFoundError:
            print(f"MISSING IMAGE: {full_path}")
            # Return a black image if file is missing (prevents crash)
            image = Image.new('RGB', (224, 224))

        label = self.data.iloc[idx, 1]

        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(label, dtype=torch.long)

# 3. CREATE DATA LOADERS
# ---------------------------------------------------------
# Define standard formatting (Resize to 224x224)
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Initialize Dataset
# Note: Pointing to where 'train_images.csv' is located
dataset = BirdDataset(
    csv_file=f'{DATA_PATH}/train_images.csv',
    root_dir=DATA_PATH,
    transform=data_transforms
)

# Split: 80% Train, 20% Validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create the Loaders (The final delivery)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

print(f" Data Loaded Successfully!")
print(f"Training Images: {len(train_dataset)}")
print(f"Validation Images: {len(val_dataset)}")

 Data Loaded Successfully!
Training Images: 3140
Validation Images: 786


In [None]:
import torch
import torch.nn as nn
from torchvision import models

# defining the baseline model
def build_baseline_model(num_classes=200):
    # 1. Load the Pre-trained ResNet50
    # "IMAGENET1K_V1" means it uses the best available pre-trained weights
    weights = models.ResNet50_Weights.IMAGENET1K_V1
    model = models.resnet50(weights=weights)

    # 2. FREEZE the weights
    # We turn off gradient calculation for all existing layers
    for param in model.parameters():
        param.requires_grad = False

    # 3. REPLACE the "Head" (The final layer)
    # ResNet's output layer is called 'fc' (Fully Connected)
    # in_features: 2048 (Standard for ResNet50)
    # out_features: 200 (Your specific number of bird classes)
    model.fc = nn.Linear(in_features=2048, out_features=num_classes)

    return model

# Initialize the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = build_baseline_model().to(device)

print(f"Model Loaded Successfully on: {device}")
print(f"Structure: ResNet50 (Frozen) -> Linear Head (200 classes)")

In [None]:
import torch.optim as optim
import time
import copy

# 1. SETUP: Loss Function & Optimizer
# ---------------------------------------------------------
# CrossEntropy is standard for multi-class classification (Bird 1 vs Bird 2 vs ...)
criterion = nn.CrossEntropyLoss()

# We only optimize 'model.fc.parameters()' because the rest is FROZEN
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# 2. THE TRAINING ENGINE
# ---------------------------------------------------------
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=3):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    # Store history to plot later
    history = {'train_loss': [], 'val_loss': [], 'val_acc': []}

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # --- TRAINING PHASE ---
        model.train()  # Set model to training mode
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward -> Backward -> Optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        history['train_loss'].append(epoch_loss)
        print(f'Train Loss: {epoch_loss:.4f}')

        # --- VALIDATION PHASE ---
        model.eval()   # Set model to evaluate mode
        val_loss = 0.0
        corrects = 0

        with torch.no_grad(): # Don't track gradients during validation
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                _, preds = torch.max(outputs, 1)

                val_loss += loss.item() * inputs.size(0)
                corrects += torch.sum(preds == labels.data)

        val_loss = val_loss / len(val_loader.dataset)
        val_acc = corrects.double() / len(val_loader.dataset)

        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc.item())

        print(f'Val Loss:   {val_loss:.4f} | Val Acc: {val_acc:.4f}')

        # Deep Copy the model if it's the best one so far
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())

        print() # Empty line

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best Val Acc: {best_acc:.4f}')

    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model, history

# 3. EXECUTE TRAINING
# ---------------------------------------------------------
# We run for 3 epochs just to get a baseline score quickly
trained_model, history = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=3)

# 4. SAVE THE MODEL
# ---------------------------------------------------------
save_path = f"{BASE_PATH}/baseline_model.pth"
torch.save(trained_model.state_dict(), save_path)
print(f"Model saved to: {save_path}")