In [1]:
# Install required packages
!pip install -q torch torchvision timm albumentations opencv-python-headless scikit-learn matplotlib seaborn pandas numpy tqdm pillow
!pip install -q ultralytics  # For YOLOv5


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import sys
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from tqdm.auto import tqdm
import cv2
from collections import defaultdict, Counter
import json
import copy
import warnings
import time
from pathlib import Path
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torch.optim import SGD, Adam, AdamW
from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR

import timm  # PyTorch Image Models
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support, accuracy_score

from ultralytics import YOLO  # YOLOv5/v8

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print(f"PyTorch version: {torch.__version__}")
print(f"Timm version: {timm.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
PyTorch version: 2.9.0+cu126
Timm version: 1.0.24
CUDA available: True
CUDA device: Tesla T4


In [4]:
# Set random seeds
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)
print("✓ Random seed set to 42")

✓ Random seed set to 42


In [5]:
# ⚠️ UPDATE THESE PATHS
DATA_DIR = '/content/drive/MyDrive/data'

# Metadata
TRAIN_METADATA = f'{DATA_DIR}/train_metadata.csv'
VAL_METADATA = f'{DATA_DIR}/val_metadata.csv'

# Image directories
TRAIN_IMAGES_DIR = f'{DATA_DIR}/train_images'
VAL_IMAGES_DIR = f'{DATA_DIR}/val_images'
TEST_IMAGES_DIR = f'{DATA_DIR}/test_images/mistery_cat'

# Output directories
CHECKPOINT_DIR = '/content/drive/MyDrive/bird_efficientnet_checkpoints'
RESULTS_DIR = '/content/drive/MyDrive/bird_efficientnet_results'
YOLO_PREPROCESSED_DIR = '/content/drive/MyDrive/bird_yolo_preprocessed'  # Store YOLO-processed images

os.makedirs(CHECKPOINT_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(YOLO_PREPROCESSED_DIR, exist_ok=True)

# Verify paths
print("=" * 70)
print("CHECKING DATA DIRECTORIES")
print("=" * 70)
for path, name in [(TRAIN_METADATA, 'Train Metadata'),
                    (VAL_METADATA, 'Val Metadata'),
                    (TRAIN_IMAGES_DIR, 'Train Images'),
                    (VAL_IMAGES_DIR, 'Val Images'),
                    (TEST_IMAGES_DIR, 'Test Images')]:
    if os.path.exists(path):
        print(f"✓ {name}: {path}")
    else:
        print(f"✗ {name}: {path} (NOT FOUND)")
print("=" * 70)

CHECKING DATA DIRECTORIES
✓ Train Metadata: /content/drive/MyDrive/data/train_metadata.csv
✓ Val Metadata: /content/drive/MyDrive/data/val_metadata.csv
✓ Train Images: /content/drive/MyDrive/data/train_images
✓ Val Images: /content/drive/MyDrive/data/val_images
✓ Test Images: /content/drive/MyDrive/data/test_images/mistery_cat


In [15]:
# Model configurations
MODEL_CONFIGS = {
    'efficientnet_b0': {
        'model_name': 'efficientnet_b0',
        'img_size': 224,
        'batch_size': 32,
        'description': 'EfficientNet-B0 (5.3M params) - Best for small datasets'
    }
    # ,
    # 'efficientnetv2_s': {
    #     'model_name': 'tf_efficientnetv2_s',
    #     'img_size': 384,
    #     'batch_size': 16,
    #     'description': 'EfficientNetV2-Small (21M params) - Modern balanced choice'
    # }
}

# Training configuration
class Config:
    # Training
    num_epochs = 100
    learning_rate = 0.001
    weight_decay = 0.01  # 100x higher for regularization
    patience = 7  # Earlier stopping to prevent overfitting

    # Regularization for small datasets
    dropout = 0.5  # Higher dropout
    label_smoothing = 0.1  # Prevent overconfident predictions

    # Feature extraction mode (CRITICAL for small datasets!)
    freeze_backbone = True  # Only train classifier head
    unfreeze_after_epochs = 0  # Set >0 to gradually unfreeze (not recommended for tiny datasets)

    # System
    num_workers = 0  # Set to 0 for Colab compatibility
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # YOLO settings
    yolo_model = 'yolov8n.pt'  # Nano model for speed
    yolo_conf_threshold = 0.25  # Detection confidence
    yolo_expand_bbox = 0.1  # Expand bbox by 10%

config = Config()

print("\n" + "=" * 70)
print("EXPERIMENT CONFIGURATION")
print("=" * 70)
print(f"Device: {config.device}")
print(f"Epochs: {config.num_epochs}")
print(f"Learning Rate: {config.learning_rate}")
print(f"Weight Decay: {config.weight_decay}")
print(f"Dropout: {config.dropout}")
print(f"Label Smoothing: {config.label_smoothing}")
print(f"Early Stopping Patience: {config.patience}")
print(f"\n🔒 FEATURE EXTRACTION MODE: {'ENABLED' if config.freeze_backbone else 'DISABLED'}")
if config.freeze_backbone:
    print(f"   → Backbone weights are FROZEN (pretrained features only)")
    print(f"   → Only classifier head will be trained (~40K params)")
    print(f"   → This prevents overfitting on small datasets!")
print("\nModels to train:")
for key, cfg in MODEL_CONFIGS.items():
    print(f"  • {cfg['description']}")
    print(f"    Input: {cfg['img_size']}×{cfg['img_size']}, Batch: {cfg['batch_size']}")
print("=" * 70)


EXPERIMENT CONFIGURATION
Device: cuda
Epochs: 100
Learning Rate: 0.001
Weight Decay: 0.01
Dropout: 0.5
Label Smoothing: 0.1
Early Stopping Patience: 7

🔒 FEATURE EXTRACTION MODE: ENABLED
   → Backbone weights are FROZEN (pretrained features only)
   → Only classifier head will be trained (~40K params)
   → This prevents overfitting on small datasets!

Models to train:
  • EfficientNet-B0 (5.3M params) - Best for small datasets
    Input: 224×224, Batch: 32


In [7]:
# Load metadata
train_df = pd.read_csv(TRAIN_METADATA)
val_df = pd.read_csv(VAL_METADATA)

print("Dataset Summary:")
print(f"  Train samples: {len(train_df)}")
print(f"  Val samples: {len(val_df)}")
print(f"  Number of classes: {train_df['class'].nunique()}")

# Class mapping - USE ORIGINAL INDICES FROM METADATA (not alphabetically sorted!)
# This ensures the model learns Kaggle's expected class indices directly
class_to_idx = dict(zip(train_df['class'], train_df['class_idx']))
class_to_idx = {k: int(v) for k, v in class_to_idx.items()}  # Ensure int type
idx_to_class = {idx: cls for cls, idx in class_to_idx.items()}
class_names = sorted(class_to_idx.keys())  # For display only
num_classes = len(class_to_idx)

print(f"\nClass mapping (using original Kaggle indices):")
print(f"  Example: {list(class_to_idx.items())[:3]}...")
print(f"\nFirst few rows:")
print(train_df.head())


Dataset Summary:
  Train samples: 1082
  Val samples: 103
  Number of classes: 20

Class mapping (using original Kaggle indices):
  Example: [('Brandt_Cormorant', 4), ('Brown_Creeper', 14), ('Bronzed_Cowbird', 19)]...

First few rows:
                                               path             class  \
0  Brandt_Cormorant/Brandt_Cormorant_0071_23007.jpg  Brandt_Cormorant   
1  Brandt_Cormorant/Brandt_Cormorant_0028_22892.jpg  Brandt_Cormorant   
2  Brandt_Cormorant/Brandt_Cormorant_0076_23021.jpg  Brandt_Cormorant   
3  Brandt_Cormorant/Brandt_Cormorant_0080_23002.jpg  Brandt_Cormorant   
4  Brandt_Cormorant/Brandt_Cormorant_0045_22916.jpg  Brandt_Cormorant   

   class_idx  
0          4  
1          4  
2          4  
3          4  
4          4  


In [8]:
# YOLO bird detector
class YOLOBirdDetector:
    def __init__(self, model_name='yolov8n.pt', conf_threshold=0.25, expand_bbox=0.1):
        """
        YOLO-based bird detector
        Args:
            model_name: YOLOv8 model (n=nano, s=small, m=medium)
            conf_threshold: Detection confidence threshold
            expand_bbox: Expand bounding box by this fraction
        """
        self.model = YOLO(model_name)
        self.conf_threshold = conf_threshold
        self.expand_bbox = expand_bbox
        # COCO class 14 = bird
        self.bird_class_id = 14

    def detect_and_crop(self, image_path):
        """
        Detect bird in image and return cropped region
        Returns: cropped_image (PIL Image) or None if no bird detected
        """
        # Read image
        img = cv2.imread(str(image_path))
        if img is None:
            return None

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img_rgb.shape[:2]

        # Run detection
        results = self.model(img_rgb, conf=self.conf_threshold, verbose=False)

        # Find bird detections
        bird_boxes = []
        for result in results:
            boxes = result.boxes
            for box in boxes:
                cls = int(box.cls[0])
                if cls == self.bird_class_id:
                    bird_boxes.append(box.xyxy[0].cpu().numpy())

        if len(bird_boxes) == 0:
            # No bird detected, return original image
            return Image.fromarray(img_rgb)

        # Use the largest box (most confident detection)
        areas = [(box[2]-box[0])*(box[3]-box[1]) for box in bird_boxes]
        largest_box = bird_boxes[np.argmax(areas)]

        # Expand bounding box
        x1, y1, x2, y2 = largest_box
        box_w, box_h = x2 - x1, y2 - y1

        x1 = max(0, int(x1 - box_w * self.expand_bbox))
        y1 = max(0, int(y1 - box_h * self.expand_bbox))
        x2 = min(w, int(x2 + box_w * self.expand_bbox))
        y2 = min(h, int(y2 + box_h * self.expand_bbox))

        # Make square (as in paper)
        crop_w, crop_h = x2 - x1, y2 - y1
        if crop_w > crop_h:
            diff = crop_w - crop_h
            y1 = max(0, y1 - diff // 2)
            y2 = min(h, y2 + diff // 2)
        else:
            diff = crop_h - crop_w
            x1 = max(0, x1 - diff // 2)
            x2 = min(w, x2 + diff // 2)

        # Crop
        cropped = img_rgb[y1:y2, x1:x2]

        return Image.fromarray(cropped)

print("✓ YOLO bird detector defined")

✓ YOLO bird detector defined


In [9]:
# Preprocess dataset with YOLO (optional - run this if you want YOLO preprocessing)
def preprocess_with_yolo(df, img_dir, output_dir, split_name='train'):
    """
    Preprocess all images in dataset with YOLO bird detection
    Saves cropped images to output_dir
    """
    print(f"\nPreprocessing {split_name} images with YOLO...")

    # Create output directory structure
    output_split_dir = os.path.join(output_dir, split_name)
    os.makedirs(output_split_dir, exist_ok=True)

    # Initialize detector
    detector = YOLOBirdDetector(
        model_name=config.yolo_model,
        conf_threshold=config.yolo_conf_threshold,
        expand_bbox=config.yolo_expand_bbox
    )

    # Process each image
    processed_paths = []
    no_detection_count = 0

    for idx, row in tqdm(df.iterrows(), total=len(df), desc=f'YOLO {split_name}'):
        img_path = os.path.join(img_dir, row['path'])

        # Detect and crop
        cropped_img = detector.detect_and_crop(img_path)

        if cropped_img is None:
            no_detection_count += 1
            processed_paths.append(None)
            continue

        # Save cropped image
        output_path = os.path.join(output_split_dir, row['path'])
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        cropped_img.save(output_path)
        processed_paths.append(output_path)

    print(f"✓ Processed {len(df)} images")
    print(f"  - Successfully cropped: {len(df) - no_detection_count}")
    print(f"  - No bird detected: {no_detection_count}")

    return output_split_dir

# Option to run YOLO preprocessing
USE_YOLO = True  # Set to True to enable YOLO preprocessing

if USE_YOLO:
    print("\n" + "="*70)
    print("YOLO PREPROCESSING")
    print("="*70)

    # Check if already preprocessed
    yolo_train_dir = os.path.join(YOLO_PREPROCESSED_DIR, 'train')
    yolo_val_dir = os.path.join(YOLO_PREPROCESSED_DIR, 'val')

    if os.path.exists(yolo_train_dir) and len(os.listdir(yolo_train_dir)) > 0:
        print("✓ YOLO-preprocessed images found, skipping preprocessing")
        TRAIN_IMAGES_YOLO = yolo_train_dir
        VAL_IMAGES_YOLO = yolo_val_dir
    else:
        print("Preprocessing images with YOLO (this may take 10-15 minutes)...")
        TRAIN_IMAGES_YOLO = preprocess_with_yolo(train_df, TRAIN_IMAGES_DIR, YOLO_PREPROCESSED_DIR, 'train')
        VAL_IMAGES_YOLO = preprocess_with_yolo(val_df, VAL_IMAGES_DIR, YOLO_PREPROCESSED_DIR, 'val')
        print("\n✓ YOLO preprocessing complete!")
else:
    print("\nSkipping YOLO preprocessing (USE_YOLO=False)")
    TRAIN_IMAGES_YOLO = None
    VAL_IMAGES_YOLO = None


YOLO PREPROCESSING
Preprocessing images with YOLO (this may take 10-15 minutes)...

Preprocessing train images with YOLO...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/yolov8n.pt to 'yolov8n.pt': 100% ━━━━━━━━━━━━ 6.2MB 105.1MB/s 0.1s


YOLO train:   0%|          | 0/1082 [00:00<?, ?it/s]

✓ Processed 1082 images
  - Successfully cropped: 1082
  - No bird detected: 0

Preprocessing val images with YOLO...


YOLO val:   0%|          | 0/103 [00:00<?, ?it/s]

✓ Processed 103 images
  - Successfully cropped: 103
  - No bird detected: 0

✓ YOLO preprocessing complete!


In [10]:
# Aggressive data augmentation for small datasets
def get_train_transforms(img_size=224):
    """
    Enhanced augmentation pipeline with multiple techniques:
    - Geometric transformations (flips, rotations, shifts, scaling)
    - Color augmentations (brightness, contrast, saturation, hue)
    - Advanced augmentations (blur, noise, compression artifacts)
    - Random erasing to improve robustness
    """
    return A.Compose([
        A.Resize(img_size, img_size),

        # Geometric augmentations - more aggressive for small datasets
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.3),
        A.ShiftScaleRotate(
            shift_limit=0.15,
            scale_limit=0.2,
            rotate_limit=30,
            border_mode=cv2.BORDER_REFLECT_101,
            p=0.7
        ),

        # Perspective and distortion
        A.OneOf([
            A.OpticalDistortion(distort_limit=0.1, shift_limit=0.1, p=0.3),
            A.GridDistortion(num_steps=5, distort_limit=0.1, p=0.3),
            A.ElasticTransform(alpha=1, sigma=50, p=0.3),
        ], p=0.3),

        # Color augmentations
        A.ColorJitter(
            brightness=0.3,
            contrast=0.3,
            saturation=0.3,
            hue=0.15,
            p=0.7
        ),

        # Additional color variations
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.HueSaturationValue(hue_shift_limit=15, sat_shift_limit=25, val_shift_limit=15, p=0.5),
            A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
        ], p=0.5),

        # Blur and noise
        A.OneOf([
            A.GaussianBlur(blur_limit=(3, 5), p=0.3),
            A.MotionBlur(blur_limit=5, p=0.3),
            A.MedianBlur(blur_limit=3, p=0.3),
        ], p=0.2),

        # Noise injection
        A.OneOf([
            A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
            A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.3),
        ], p=0.2),

        # Compression artifacts
        A.OneOf([
            A.ImageCompression(quality_lower=80, quality_upper=100, p=0.3),
            A.Downscale(scale_min=0.75, scale_max=0.95, p=0.3),
        ], p=0.2),

        # Random erasing / cutout
        A.CoarseDropout(
            max_holes=8,
            max_height=16,
            max_width=16,
            min_holes=4,
            min_height=8,
            min_width=8,
            fill_value=0,
            p=0.3
        ),

        # Normalize (ImageNet stats for pretrained models)
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

def get_val_transforms(img_size=224):
    """Validation transforms - no augmentation"""
    return A.Compose([
        A.Resize(img_size, img_size),
        A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ToTensorV2()
    ])

print("✓ AGGRESSIVE data augmentation pipelines created")
print("  📊 Augmentation includes:")
print("     • Geometric: flips, rotations (±30°), shifts, scaling, distortions")
print("     • Color: brightness, contrast, saturation, hue, RGB shifts")
print("     • Quality: blur, noise, compression artifacts, downscaling")
print("     • Robustness: random erasing (CoarseDropout)")

✓ AGGRESSIVE data augmentation pipelines created
  📊 Augmentation includes:
     • Geometric: flips, rotations (±30°), shifts, scaling, distortions
     • Color: brightness, contrast, saturation, hue, RGB shifts
     • Quality: blur, noise, compression artifacts, downscaling
     • Robustness: random erasing (CoarseDropout)


In [11]:
# Dataset class
class BirdDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, class_to_idx=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
        self.class_to_idx = class_to_idx

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.img_dir, row['path'])

        # Load image
        image = cv2.imread(img_path)
        if image is None:
            # Fallback: create black image
            image = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Apply transforms
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']

        # Get label
        if self.class_to_idx:
            label = self.class_to_idx[row['class']]
        else:
            label = row['class_idx']

        return image, label

print("✓ Dataset class defined")

✓ Dataset class defined


In [12]:
# EfficientNet model wrapper
class EfficientNetClassifier(nn.Module):
    def __init__(self, model_name, num_classes, pretrained=True, dropout=0.3, freeze_backbone=False):
        super(EfficientNetClassifier, self).__init__()

        # Load pretrained model
        self.backbone = timm.create_model(
            model_name,
            pretrained=pretrained,
            num_classes=0  # Remove head
        )

        # Get feature dimension
        self.feature_dim = self.backbone.num_features

        # FREEZE BACKBONE for feature extraction mode
        self.freeze_backbone = freeze_backbone
        if freeze_backbone:
            for param in self.backbone.parameters():
                param.requires_grad = False
            print(f"   🔒 Backbone FROZEN - only classifier will train")

        # Classification head with dropout
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(self.feature_dim, num_classes)
        )

    def forward(self, x):
        features = self.backbone(x)
        logits = self.classifier(features)
        return logits

    def unfreeze_backbone(self):
        """Unfreeze backbone for fine-tuning (use after initial training)"""
        for param in self.backbone.parameters():
            param.requires_grad = True
        self.freeze_backbone = False
        print("   🔓 Backbone UNFROZEN - all parameters now trainable")

print("✓ EfficientNet model wrapper defined (with freeze support)")


✓ EfficientNet model wrapper defined (with freeze support)


In [13]:
# Training function
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    pbar = tqdm(dataloader, desc='Training', leave=False)
    for images, labels in pbar:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        pbar.set_postfix({'loss': f'{loss.item():.4f}', 'acc': f'{100.*correct/total:.2f}%'})

    return running_loss / len(dataloader), 100. * correct / total

# Validation function
def validate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc='Validation', leave=False):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            probs = torch.softmax(outputs, dim=1)
            _, predicted = torch.max(outputs, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    val_loss = running_loss / len(dataloader)
    val_acc = 100. * accuracy_score(all_labels, all_preds)

    return val_loss, val_acc, all_preds, all_labels, all_probs

# Compute metrics
def compute_metrics(labels, preds, probs):
    top1_acc = accuracy_score(labels, preds)

    # Top5
    probs_array = np.array(probs)
    top5_preds = np.argsort(probs_array, axis=1)[:, -5:]
    top5_acc = np.mean([labels[i] in top5_preds[i] for i in range(len(labels))])

    # Precision, Recall, F1
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, preds, average='macro', zero_division=0
    )

    return {
        'top1': top1_acc,
        'top5': top5_acc,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

print("✓ Training functions defined")

✓ Training functions defined


In [14]:
# Complete training loop
def train_model(model, train_loader, val_loader, model_name, num_epochs, device):
    """
    Train model with early stopping and label smoothing for small datasets
    """
    # Use label smoothing to prevent overconfident predictions
    criterion = nn.CrossEntropyLoss(label_smoothing=config.label_smoothing)
    optimizer = AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': [],
        'val_top5': [], 'val_f1': []
    }

    best_val_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    patience_counter = 0

    print(f"\n{'='*70}")
    print(f"Training: {model_name}")
    print(f"{'='*70}")

    start_time = time.time()

    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        print("-" * 50)

        # Train
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)

        # Validate
        val_loss, val_acc, val_preds, val_labels, val_probs = validate(
            model, val_loader, criterion, device
        )

        # Metrics
        metrics = compute_metrics(val_labels, val_preds, val_probs)

        # Update history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['val_top5'].append(metrics['top5'] * 100)
        history['val_f1'].append(metrics['f1'])

        # Scheduler
        scheduler.step()

        # Print summary
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.2f}%")
        print(f"Top5: {metrics['top5']*100:.2f}% | F1: {metrics['f1']:.4f}")

        # Save best
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_wts = copy.deepcopy(model.state_dict())
            patience_counter = 0

            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_acc': val_acc,
                'history': history
            }, f'{CHECKPOINT_DIR}/{model_name}_best.pth')

            print(f"✓ New best model saved (Val Acc: {val_acc:.2f}%)")
        else:
            patience_counter += 1

        # Early stopping
        if patience_counter >= config.patience:
            print(f"\nEarly stopping at epoch {epoch+1}")
            break

    # Load best weights
    model.load_state_dict(best_model_wts)

    elapsed = time.time() - start_time
    print(f"\n✓ Training complete! Best Val Acc: {best_val_acc:.2f}%")
    print(f"  Time: {elapsed/60:.1f} minutes")

    return model, history, best_val_acc

print("✓ Complete training loop defined")

✓ Complete training loop defined


In [16]:
# Experiment runner
def run_experiment(model_config, use_yolo_preprocessing=False):
    """
    Run single experiment with given model configuration
    """
    model_name = model_config['model_name']
    img_size = model_config['img_size']
    batch_size = model_config['batch_size']

    preprocessing = 'yolo' if use_yolo_preprocessing else 'direct'
    experiment_name = f"{model_name}_{preprocessing}"

    print("\n" + "#" * 70)
    print(f"EXPERIMENT: {experiment_name}")
    print("#" * 70)
    print(f"Model: {model_config['description']}")
    print(f"Image size: {img_size}×{img_size}")
    print(f"Batch size: {batch_size}")
    print(f"Preprocessing: {'YOLO bird detection' if use_yolo_preprocessing else 'Direct resize'}")

    # Select image directories
    if use_yolo_preprocessing and TRAIN_IMAGES_YOLO is not None:
        train_img_dir = TRAIN_IMAGES_YOLO
        val_img_dir = VAL_IMAGES_YOLO
    else:
        train_img_dir = TRAIN_IMAGES_DIR
        val_img_dir = VAL_IMAGES_DIR

    # Create datasets
    train_dataset = BirdDataset(
        train_df, train_img_dir,
        transform=get_train_transforms(img_size),
        class_to_idx=class_to_idx
    )

    val_dataset = BirdDataset(
        val_df, val_img_dir,
        transform=get_val_transforms(img_size),
        class_to_idx=class_to_idx
    )

    # Create dataloaders
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size,
        shuffle=True, num_workers=config.num_workers,
        pin_memory=True
    )

    val_loader = DataLoader(
        val_dataset, batch_size=batch_size,
        shuffle=False, num_workers=config.num_workers,
        pin_memory=True
    )

    print(f"\nDataloaders created:")
    print(f"  Train: {len(train_dataset)} samples, {len(train_loader)} batches")
    print(f"  Val: {len(val_dataset)} samples, {len(val_loader)} batches")

    # Create model with frozen backbone for feature extraction
    model = EfficientNetClassifier(
        model_name=model_name,
        num_classes=num_classes,
        pretrained=True,
        dropout=config.dropout,
        freeze_backbone=config.freeze_backbone
    ).to(config.device)

    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nModel created:")
    print(f"  Total params: {total_params:,}")
    print(f"  Trainable params: {trainable_params:,}")
    if config.freeze_backbone:
        print(f"  ✓ Feature extraction mode: {trainable_params:,} trainable (classifier only)")

    # Train
    model, history, best_val_acc = train_model(
        model, train_loader, val_loader,
        experiment_name, config.num_epochs, config.device
    )

    # Save history
    with open(f'{RESULTS_DIR}/{experiment_name}_history.json', 'w') as f:
        json.dump(history, f)

    return {
        'name': experiment_name,
        'model_name': model_name,
        'preprocessing': preprocessing,
        'img_size': img_size,
        'batch_size': batch_size,
        'best_val_acc': best_val_acc,
        'history': history
    }

print("✓ Experiment runner defined")

✓ Experiment runner defined


In [17]:
# Run all experiments
all_results = []

print("\n" + "="*70)
print("STARTING ALL EXPERIMENTS")
print("="*70)
# print(f"Total experiments to run: {len(MODEL_CONFIGS)}")
# print("\nThis will take approximately 2-4 hours depending on GPU...\n")

# Run each model with and without YOLO preprocessing
for model_key, model_config in MODEL_CONFIGS.items():

    # Without YOLO
    print("\n" + "*"*70)
    print(f"Experiment {len(all_results)+1}/{len(MODEL_CONFIGS)*2}")
    print("*"*70)
    result = run_experiment(model_config, use_yolo_preprocessing=False)
    all_results.append(result)

    # With YOLO (if available)
    print("YOLOOOOOOOOOOO")
    if USE_YOLO and TRAIN_IMAGES_YOLO is not None:
        print("\n" + "*"*70)
        print(f"Experiment {len(all_results)+1}/{len(MODEL_CONFIGS)*2}")
        print("*"*70)
        result = run_experiment(model_config, use_yolo_preprocessing=True)
        all_results.append(result)

print("\n" + "="*70)
print("ALL EXPERIMENTS COMPLETED!")
print("="*70)



STARTING ALL EXPERIMENTS

**********************************************************************
Experiment 1/2
**********************************************************************

######################################################################
EXPERIMENT: efficientnet_b0_direct
######################################################################
Model: EfficientNet-B0 (5.3M params) - Best for small datasets
Image size: 224×224
Batch size: 32
Preprocessing: Direct resize

Dataloaders created:
  Train: 1082 samples, 34 batches
  Val: 103 samples, 4 batches


model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

   🔒 Backbone FROZEN - only classifier will train

Model created:
  Total params: 4,033,168
  Trainable params: 25,620
  ✓ Feature extraction mode: 25,620 trainable (classifier only)

Training: efficientnet_b0_direct

Epoch 1/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.8708 | Train Acc: 12.75%
Val Loss: 2.5244 | Val Acc: 61.17%
Top5: 86.41% | F1: 0.6048
✓ New best model saved (Val Acc: 61.17%)

Epoch 2/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.5485 | Train Acc: 33.36%
Val Loss: 2.1343 | Val Acc: 67.96%
Top5: 92.23% | F1: 0.6749
✓ New best model saved (Val Acc: 67.96%)

Epoch 3/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.3461 | Train Acc: 40.11%
Val Loss: 1.9103 | Val Acc: 71.84%
Top5: 95.15% | F1: 0.7278
✓ New best model saved (Val Acc: 71.84%)

Epoch 4/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.2158 | Train Acc: 45.84%
Val Loss: 1.7657 | Val Acc: 72.82%
Top5: 95.15% | F1: 0.7305
✓ New best model saved (Val Acc: 72.82%)

Epoch 5/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.1097 | Train Acc: 47.69%
Val Loss: 1.6386 | Val Acc: 73.79%
Top5: 95.15% | F1: 0.7483
✓ New best model saved (Val Acc: 73.79%)

Epoch 6/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.0244 | Train Acc: 51.11%
Val Loss: 1.5762 | Val Acc: 72.82%
Top5: 96.12% | F1: 0.7318

Epoch 7/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.0000 | Train Acc: 49.63%
Val Loss: 1.5283 | Val Acc: 76.70%
Top5: 95.15% | F1: 0.7704
✓ New best model saved (Val Acc: 76.70%)

Epoch 8/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.9433 | Train Acc: 53.14%
Val Loss: 1.4493 | Val Acc: 76.70%
Top5: 96.12% | F1: 0.7734

Epoch 9/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.9186 | Train Acc: 54.34%
Val Loss: 1.4019 | Val Acc: 79.61%
Top5: 98.06% | F1: 0.8015
✓ New best model saved (Val Acc: 79.61%)

Epoch 10/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.9001 | Train Acc: 54.25%
Val Loss: 1.3770 | Val Acc: 78.64%
Top5: 97.09% | F1: 0.7858

Epoch 11/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8769 | Train Acc: 54.99%
Val Loss: 1.3498 | Val Acc: 78.64%
Top5: 97.09% | F1: 0.7876

Epoch 12/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8542 | Train Acc: 54.90%
Val Loss: 1.3393 | Val Acc: 80.58%
Top5: 97.09% | F1: 0.8069
✓ New best model saved (Val Acc: 80.58%)

Epoch 13/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8395 | Train Acc: 55.45%
Val Loss: 1.3236 | Val Acc: 79.61%
Top5: 97.09% | F1: 0.7990

Epoch 14/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8312 | Train Acc: 54.34%
Val Loss: 1.2904 | Val Acc: 80.58%
Top5: 97.09% | F1: 0.7926

Epoch 15/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7861 | Train Acc: 58.41%
Val Loss: 1.2941 | Val Acc: 78.64%
Top5: 97.09% | F1: 0.7929

Epoch 16/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8300 | Train Acc: 53.33%
Val Loss: 1.3098 | Val Acc: 78.64%
Top5: 98.06% | F1: 0.7842

Epoch 17/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8178 | Train Acc: 54.53%
Val Loss: 1.2968 | Val Acc: 81.55%
Top5: 97.09% | F1: 0.8151
✓ New best model saved (Val Acc: 81.55%)

Epoch 18/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7722 | Train Acc: 57.02%
Val Loss: 1.2721 | Val Acc: 78.64%
Top5: 97.09% | F1: 0.7934

Epoch 19/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7648 | Train Acc: 58.04%
Val Loss: 1.2617 | Val Acc: 77.67%
Top5: 98.06% | F1: 0.7779

Epoch 20/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7778 | Train Acc: 56.65%
Val Loss: 1.2438 | Val Acc: 79.61%
Top5: 96.12% | F1: 0.7902

Epoch 21/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7701 | Train Acc: 56.56%
Val Loss: 1.2626 | Val Acc: 79.61%
Top5: 97.09% | F1: 0.7908

Epoch 22/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7501 | Train Acc: 59.06%
Val Loss: 1.2429 | Val Acc: 81.55%
Top5: 98.06% | F1: 0.8086

Epoch 23/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7388 | Train Acc: 58.04%
Val Loss: 1.2375 | Val Acc: 82.52%
Top5: 97.09% | F1: 0.8196
✓ New best model saved (Val Acc: 82.52%)

Epoch 24/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7970 | Train Acc: 55.73%
Val Loss: 1.2203 | Val Acc: 80.58%
Top5: 98.06% | F1: 0.8057

Epoch 25/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7378 | Train Acc: 58.78%
Val Loss: 1.2366 | Val Acc: 82.52%
Top5: 98.06% | F1: 0.8224

Epoch 26/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7180 | Train Acc: 58.13%
Val Loss: 1.1913 | Val Acc: 82.52%
Top5: 98.06% | F1: 0.8174

Epoch 27/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7221 | Train Acc: 59.06%
Val Loss: 1.2113 | Val Acc: 81.55%
Top5: 98.06% | F1: 0.8086

Epoch 28/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7036 | Train Acc: 60.54%
Val Loss: 1.2261 | Val Acc: 78.64%
Top5: 97.09% | F1: 0.7828

Epoch 29/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7046 | Train Acc: 59.33%
Val Loss: 1.1921 | Val Acc: 80.58%
Top5: 97.09% | F1: 0.7999

Epoch 30/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7257 | Train Acc: 59.15%
Val Loss: 1.2170 | Val Acc: 80.58%
Top5: 98.06% | F1: 0.8010

Early stopping at epoch 30

✓ Training complete! Best Val Acc: 82.52%
  Time: 9.0 minutes
YOLOOOOOOOOOOO

**********************************************************************
Experiment 2/2
**********************************************************************

######################################################################
EXPERIMENT: efficientnet_b0_yolo
######################################################################
Model: EfficientNet-B0 (5.3M params) - Best for small datasets
Image size: 224×224
Batch size: 32
Preprocessing: YOLO bird detection

Dataloaders created:
  Train: 1082 samples, 34 batches
  Val: 103 samples, 4 batches
   🔒 Backbone FROZEN - only classifier will train

Model created:
  Total params: 4,033,168
  Trainable params: 25,620
  ✓ Feature extraction mode: 25,620 trainable (classifier only)

Training: efficientnet_b0_yolo

Epoch 1/100
-------------

Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.8884 | Train Acc: 13.59%
Val Loss: 2.4087 | Val Acc: 66.02%
Top5: 95.15% | F1: 0.6334
✓ New best model saved (Val Acc: 66.02%)

Epoch 2/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.5156 | Train Acc: 35.77%
Val Loss: 2.0166 | Val Acc: 77.67%
Top5: 97.09% | F1: 0.7650
✓ New best model saved (Val Acc: 77.67%)

Epoch 3/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.2556 | Train Acc: 47.78%
Val Loss: 1.7574 | Val Acc: 79.61%
Top5: 95.15% | F1: 0.7819
✓ New best model saved (Val Acc: 79.61%)

Epoch 4/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.1210 | Train Acc: 50.00%
Val Loss: 1.5942 | Val Acc: 82.52%
Top5: 98.06% | F1: 0.8229
✓ New best model saved (Val Acc: 82.52%)

Epoch 5/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.0226 | Train Acc: 53.05%
Val Loss: 1.4598 | Val Acc: 84.47%
Top5: 97.09% | F1: 0.8324
✓ New best model saved (Val Acc: 84.47%)

Epoch 6/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.9491 | Train Acc: 53.70%
Val Loss: 1.4065 | Val Acc: 82.52%
Top5: 95.15% | F1: 0.8288

Epoch 7/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8823 | Train Acc: 56.38%
Val Loss: 1.3331 | Val Acc: 81.55%
Top5: 98.06% | F1: 0.8142

Epoch 8/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.8394 | Train Acc: 57.39%
Val Loss: 1.2754 | Val Acc: 84.47%
Top5: 98.06% | F1: 0.8363

Epoch 9/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7793 | Train Acc: 60.07%
Val Loss: 1.2172 | Val Acc: 82.52%
Top5: 98.06% | F1: 0.8180

Epoch 10/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7768 | Train Acc: 57.49%
Val Loss: 1.2052 | Val Acc: 85.44%
Top5: 98.06% | F1: 0.8520
✓ New best model saved (Val Acc: 85.44%)

Epoch 11/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7452 | Train Acc: 59.70%
Val Loss: 1.1827 | Val Acc: 82.52%
Top5: 98.06% | F1: 0.8189

Epoch 12/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7110 | Train Acc: 62.20%
Val Loss: 1.1461 | Val Acc: 83.50%
Top5: 98.06% | F1: 0.8333

Epoch 13/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7299 | Train Acc: 58.50%
Val Loss: 1.1475 | Val Acc: 87.38%
Top5: 98.06% | F1: 0.8679
✓ New best model saved (Val Acc: 87.38%)

Epoch 14/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7005 | Train Acc: 60.44%
Val Loss: 1.1661 | Val Acc: 83.50%
Top5: 98.06% | F1: 0.8328

Epoch 15/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.7092 | Train Acc: 60.91%
Val Loss: 1.1363 | Val Acc: 86.41%
Top5: 98.06% | F1: 0.8509

Epoch 16/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.6505 | Train Acc: 63.03%
Val Loss: 1.1271 | Val Acc: 86.41%
Top5: 98.06% | F1: 0.8512

Epoch 17/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.6992 | Train Acc: 59.43%
Val Loss: 1.1212 | Val Acc: 83.50%
Top5: 98.06% | F1: 0.8258

Epoch 18/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.6714 | Train Acc: 63.59%
Val Loss: 1.1368 | Val Acc: 86.41%
Top5: 97.09% | F1: 0.8558

Epoch 19/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.6891 | Train Acc: 61.28%
Val Loss: 1.1024 | Val Acc: 86.41%
Top5: 98.06% | F1: 0.8540

Epoch 20/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 1.6488 | Train Acc: 63.22%
Val Loss: 1.1069 | Val Acc: 86.41%
Top5: 98.06% | F1: 0.8569

Early stopping at epoch 20

✓ Training complete! Best Val Acc: 87.38%
  Time: 4.9 minutes

ALL EXPERIMENTS COMPLETED!

STARTING ALL EXPERIMENTS
Total experiments to run: 1

This will take approximately 2-4 hours depending on GPU...


**********************************************************************
Experiment 1/2
**********************************************************************

######################################################################
EXPERIMENT: efficientnet_b0_direct
######################################################################
Model: EfficientNet-B0 (5.3M params) - Best for small datasets
Image size: 224×224
Batch size: 32
Preprocessing: Direct resize

Dataloaders created:
  Train: 1082 samples, 34 batches
  Val: 103 samples, 4 batches
   🔒 Backbone FROZEN - only classifier will train

Model created:
  Total params: 4,033,168
  Trainable params: 25,62

Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.8707 | Train Acc: 14.42%
Val Loss: 2.5248 | Val Acc: 50.49%
Top5: 89.32% | F1: 0.4301
✓ New best model saved (Val Acc: 50.49%)

Epoch 2/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.5371 | Train Acc: 34.20%
Val Loss: 2.1606 | Val Acc: 61.17%
Top5: 95.15% | F1: 0.5519
✓ New best model saved (Val Acc: 61.17%)

Epoch 3/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.3274 | Train Acc: 43.53%
Val Loss: 1.9407 | Val Acc: 71.84%
Top5: 96.12% | F1: 0.6911
✓ New best model saved (Val Acc: 71.84%)

Epoch 4/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.2140 | Train Acc: 44.55%
Val Loss: 1.7874 | Val Acc: 76.70%
Top5: 95.15% | F1: 0.7525
✓ New best model saved (Val Acc: 76.70%)

Epoch 5/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]

Validation:   0%|          | 0/4 [00:00<?, ?it/s]

Train Loss: 2.0906 | Train Acc: 47.87%
Val Loss: 1.6886 | Val Acc: 74.76%
Top5: 97.09% | F1: 0.7370

Epoch 6/100
--------------------------------------------------


Training:   0%|          | 0/34 [00:00<?, ?it/s]


KeyboardInterrupt



In [18]:
# ============================================================================
# 🔮 RUN PREDICTIONS ON TEST DATA FOR SAVED MODELS
# ============================================================================

print("\n" + "=" * 70)
print("LOADING SAVED MODELS AND RUNNING TEST PREDICTIONS")
print("=" * 70)

# Get all checkpoint files
checkpoint_files = [f for f in os.listdir(CHECKPOINT_DIR) if f.endswith('_best.pth')]
print(f"Found {len(checkpoint_files)} saved model(s): {checkpoint_files}")

if len(checkpoint_files) == 0:
    print("❌ No checkpoint files found in the directory!")
else:
    # =========================================================================
    # TTA (Test Time Augmentation) for improved predictions
    # =========================================================================
    def get_tta_transforms(img_size=224):
        """
        Returns a list of transforms for Test Time Augmentation.
        Each transform produces a different view of the image.
        """
        base_transform = A.Compose([
            A.Resize(img_size, img_size),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ])

        tta_transforms = [
            # Original
            base_transform,
            # Horizontal flip
            A.Compose([
                A.Resize(img_size, img_size),
                A.HorizontalFlip(p=1.0),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ]),
            # Slight rotation left
            A.Compose([
                A.Resize(img_size, img_size),
                A.Rotate(limit=(10, 10), p=1.0, border_mode=cv2.BORDER_REFLECT_101),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ]),
            # Slight rotation right
            A.Compose([
                A.Resize(img_size, img_size),
                A.Rotate(limit=(-10, -10), p=1.0, border_mode=cv2.BORDER_REFLECT_101),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ]),
            # Scale up slightly
            A.Compose([
                A.Resize(int(img_size * 1.1), int(img_size * 1.1)),
                A.CenterCrop(img_size, img_size),
                A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ToTensorV2()
            ]),
        ]
        return tta_transforms

    # Helper function to load model and run predictions
    def load_model_and_predict(checkpoint_path, checkpoint_name, use_tta=True):
        """Load a checkpoint and run predictions on test data

        Args:
            checkpoint_path: Path to the .pth checkpoint file
            checkpoint_name: Name of the checkpoint file
            use_tta: Whether to use Test Time Augmentation (default: True)
        """

        print(f"\n{'='*70}")
        print(f"PROCESSING: {checkpoint_name}")
        print(f"{'='*70}")

        # Load checkpoint
        checkpoint = torch.load(checkpoint_path, map_location=config.device, weights_only=False)

        # Determine model architecture from checkpoint name
        if 'efficientnetv2_s' in checkpoint_name:
            model_name = 'tf_efficientnetv2_s'
            img_size = 384
        elif 'efficientnet_b0' in checkpoint_name:
            model_name = 'efficientnet_b0'
            img_size = 224
        else:
            print(f"⚠️ Could not determine model type from {checkpoint_name}")
            return None

        # CRITICAL: Detect if this is a YOLO-trained model
        use_yolo_for_test = 'yolo' in checkpoint_name.lower()

        print(f"Model: {model_name}")
        print(f"Image size: {img_size}")
        print(f"Validation Accuracy: {checkpoint['val_acc']:.2f}%")
        print(f"Trained for {checkpoint['epoch']+1} epochs")
        print(f"YOLO preprocessing: {'YES ✓' if use_yolo_for_test else 'NO'}")
        print(f"Test Time Augmentation: {'YES ✓' if use_tta else 'NO'}")

        # Create model
        model = EfficientNetClassifier(
            model_name=model_name,
            num_classes=num_classes,
            pretrained=False,
            dropout=0.3
        )

        # Load weights
        model.load_state_dict(checkpoint['model_state_dict'])
        model = model.to(config.device)
        model.eval()

        print("✓ Model loaded successfully")

        # Initialize YOLO detector if needed
        yolo_detector = None
        if use_yolo_for_test:
            print("🔍 Initializing YOLO bird detector for test preprocessing...")
            yolo_detector = YOLOBirdDetector(
                model_name=config.yolo_model,
                conf_threshold=config.yolo_conf_threshold,
                expand_bbox=config.yolo_expand_bbox
            )
            print("✓ YOLO detector ready")

        # Get test images
        test_image_paths = sorted([
            os.path.join(TEST_IMAGES_DIR, f)
            for f in os.listdir(TEST_IMAGES_DIR)
            if f.endswith(('.jpg', '.jpeg', '.png'))
        ])

        print(f"📁 Found {len(test_image_paths)} test images")

        # Create test dataset with YOLO support
        class TestDatasetWithYOLO(Dataset):
            def __init__(self, image_paths, transform, yolo_detector=None):
                self.image_paths = image_paths
                self.transform = transform
                self.yolo_detector = yolo_detector

            def __len__(self):
                return len(self.image_paths)

            def __getitem__(self, idx):
                img_path = self.image_paths[idx]

                # Apply YOLO preprocessing if detector is provided
                if self.yolo_detector is not None:
                    pil_image = self.yolo_detector.detect_and_crop(img_path)
                    if pil_image is not None:
                        image = np.array(pil_image)
                    else:
                        # Fallback to direct loading
                        image = cv2.imread(img_path)
                        if image is not None:
                            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                        else:
                            image = np.zeros((224, 224, 3), dtype=np.uint8)
                else:
                    # Direct loading without YOLO
                    image = cv2.imread(img_path)
                    if image is None:
                        image = np.zeros((224, 224, 3), dtype=np.uint8)
                    else:
                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Apply transforms
                if self.transform:
                    augmented = self.transform(image=image)
                    image = augmented['image']

                return image, img_path

        # Function to run inference with optional TTA
        def run_inference_with_tta(model, image_paths, yolo_detector, img_size, use_tta=True):
            """Run inference with optional Test Time Augmentation"""

            if use_tta:
                tta_transforms_list = get_tta_transforms(img_size)
                print(f"🔄 Running TTA inference with {len(tta_transforms_list)} augmentations...")
            else:
                tta_transforms_list = [get_val_transforms(img_size)]
                print(f"🔄 Running standard inference...")

            # Collect predictions from all augmentations
            all_probs = []

            for aug_idx, transform in enumerate(tta_transforms_list):
                if use_tta:
                    print(f"  Augmentation {aug_idx + 1}/{len(tta_transforms_list)}...")

                test_dataset = TestDatasetWithYOLO(image_paths, transform, yolo_detector)
                test_loader = DataLoader(
                    test_dataset, batch_size=32, shuffle=False,
                    num_workers=config.num_workers, pin_memory=True
                )

                aug_probs = []
                with torch.no_grad():
                    for images, paths in test_loader:
                        images = images.to(config.device)
                        outputs = model(images)
                        probs = torch.softmax(outputs, dim=1)
                        aug_probs.extend(probs.cpu().numpy())

                all_probs.append(np.array(aug_probs))

            # Average predictions across all augmentations
            avg_probs = np.mean(all_probs, axis=0)
            final_predictions = np.argmax(avg_probs, axis=1)
            confidence_scores = np.max(avg_probs, axis=1)

            return final_predictions, confidence_scores, avg_probs

        # Run inference
        all_predictions, confidence_scores, all_probs = run_inference_with_tta(
            model, test_image_paths, yolo_detector, img_size, use_tta=use_tta
        )

        # Create submission dataframe
        original_paths = [os.path.basename(p) for p in test_image_paths]

        submission = pd.DataFrame({
            'path': original_paths,
            'class_idx': all_predictions
        })

        # Generate unique filename with timestamp
        from datetime import datetime
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        model_identifier = checkpoint_name.replace('_best.pth', '')
        tta_suffix = '_tta' if use_tta else ''
        submission_filename = f'submission_{model_identifier}{tta_suffix}_{timestamp}.csv'
        submission_path = f'{RESULTS_DIR}/{submission_filename}'

        # Save submission
        submission.to_csv(submission_path, index=False)

        print(f"\n✅ PREDICTIONS COMPLETE!")
        print(f"📊 Predictions: {len(submission)} images")
        print(f"💾 Saved to: {submission_path}")
        print(f"📁 Filename: {submission_filename}")

        # =====================================================================
        # VERIFICATION & DIAGNOSTICS
        # =====================================================================
        print("\n" + "=" * 50)
        print("📋 VERIFICATION & DIAGNOSTICS")
        print("=" * 50)

        # 1. Prediction confidence statistics
        print(f"\n🎯 Prediction Confidence:")
        print(f"  Mean confidence: {np.mean(confidence_scores):.4f}")
        print(f"  Min confidence:  {np.min(confidence_scores):.4f}")
        print(f"  Max confidence:  {np.max(confidence_scores):.4f}")
        print(f"  Std confidence:  {np.std(confidence_scores):.4f}")

        # Low confidence predictions (potential errors)
        low_conf_threshold = 0.5
        low_conf_count = np.sum(confidence_scores < low_conf_threshold)
        print(f"  Low confidence (<{low_conf_threshold}): {low_conf_count} ({100*low_conf_count/len(confidence_scores):.1f}%)")

        # 2. Class distribution comparison
        print(f"\n📈 Test Prediction Distribution:")
        class_counts = submission['class_idx'].value_counts().sort_index()
        for class_idx, count in class_counts.items():
            class_name = idx_to_class.get(class_idx, 'Unknown')
            pct = 100 * count / len(submission)
            print(f"  Class {class_idx:2d} ({class_name:25s}): {count:4d} images ({pct:5.1f}%)")

        # 3. Compare with training distribution
        print(f"\n📊 Training Class Distribution (for comparison):")
        train_class_counts = train_df['class_idx'].value_counts().sort_index()
        for class_idx, count in train_class_counts.items():
            class_name = idx_to_class.get(class_idx, 'Unknown')
            pct = 100 * count / len(train_df)
            print(f"  Class {class_idx:2d} ({class_name:25s}): {count:4d} images ({pct:5.1f}%)")

        # 4. YOLO detection stats (if used)
        if use_yolo_for_test:
            print(f"\n🔍 YOLO was used for test preprocessing")
            print(f"  This ensures consistency with training preprocessing")

        # 5. Save detailed predictions with confidence
        detailed_submission = pd.DataFrame({
            'path': original_paths,
            'class_idx': all_predictions,
            'confidence': confidence_scores,
            'class_name': [idx_to_class.get(p, 'Unknown') for p in all_predictions]
        })
        detailed_path = submission_path.replace('.csv', '_detailed.csv')
        detailed_submission.to_csv(detailed_path, index=False)
        print(f"\n💾 Detailed predictions saved to: {detailed_path}")

        return submission_path

    # Process each checkpoint
    submission_files = []
    for checkpoint_file in checkpoint_files:
        checkpoint_path = os.path.join(CHECKPOINT_DIR, checkpoint_file)
        submission_path = load_model_and_predict(checkpoint_path, checkpoint_file)
        if submission_path:
            submission_files.append(submission_path)

    # Final summary
    print("\n" + "=" * 70)
    print("🎉 ALL PREDICTIONS COMPLETED!")
    print("=" * 70)
    print(f"Generated {len(submission_files)} submission file(s):")
    for i, path in enumerate(submission_files, 1):
        print(f"  {i}. {os.path.basename(path)}")
    print(f"\nAll files saved to: {RESULTS_DIR}/")
    print("=" * 70)


LOADING SAVED MODELS AND RUNNING TEST PREDICTIONS
Found 2 saved model(s): ['efficientnet_b0_direct_best.pth', 'efficientnet_b0_yolo_best.pth']

PROCESSING: efficientnet_b0_direct_best.pth
Model: efficientnet_b0
Image size: 224
Validation Accuracy: 76.70%
Trained for 4 epochs
YOLO preprocessing: NO
Test Time Augmentation: YES ✓
✓ Model loaded successfully
📁 Found 400 test images
🔄 Running TTA inference with 5 augmentations...
  Augmentation 1/5...
  Augmentation 2/5...
  Augmentation 3/5...
  Augmentation 4/5...
  Augmentation 5/5...

✅ PREDICTIONS COMPLETE!
📊 Predictions: 400 images
💾 Saved to: /content/drive/MyDrive/bird_efficientnet_results/submission_efficientnet_b0_direct_tta_20260207_172829.csv
📁 Filename: submission_efficientnet_b0_direct_tta_20260207_172829.csv

📋 VERIFICATION & DIAGNOSTICS

🎯 Prediction Confidence:
  Mean confidence: 0.2404
  Min confidence:  0.0826
  Max confidence:  0.7908
  Std confidence:  0.1224
  Low confidence (<0.5): 379 (94.8%)

📈 Test Prediction Dist