In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
import pandas as pd
from PIL import Image
import os
from tqdm import tqdm
import numpy as np
from google.colab import drive
import zipfile


In [5]:
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)

In [6]:
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
zip_path = '/content/drive/MyDrive/ColabNotebooks/public_tests.zip'
extract_path = '/content/public_tests'

In [11]:
# Unzip dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [12]:
images_path = os.path.join(extract_path, '00_test_img_input', 'train', 'images')
csv_path = os.path.join(extract_path, '00_test_img_input', 'train', 'gt.csv')

In [14]:
class BirdDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.df = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['filename']
        label = int(self.df.iloc[idx]['class_id'])
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# Strong data augmentation
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(p=0.2),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

df = pd.read_csv(csv_path)

# Create datasets and data loaders
full_dataset = BirdDataset(df, images_path, transform=train_transforms)

In [15]:
def stratified_split(dataset, train_ratio=0.8):
    # Group by class
    df = dataset.df
    class_indices = {}
    for class_id in df['class_id'].unique():
        class_indices[class_id] = df[df['class_id'] == class_id].index.tolist()

    train_indices = []
    val_indices = []

    # Split each class according to the ratio
    for class_id, indices in class_indices.items():
        np.random.shuffle(indices)
        split_idx = int(len(indices) * train_ratio)
        train_indices.extend(indices[:split_idx])
        val_indices.extend(indices[split_idx:])

    # Create new dataframes for train and val
    train_df = df.iloc[train_indices].reset_index(drop=True)
    val_df = df.iloc[val_indices].reset_index(drop=True)

    # Create new datasets
    train_dataset = BirdDataset(train_df, dataset.img_dir, transform=train_transforms)
    val_dataset = BirdDataset(val_df, dataset.img_dir, transform=val_transforms)

    return train_dataset, val_dataset

train_dataset, val_dataset = stratified_split(full_dataset)

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")


Training set size: 2000
Validation set size: 500


In [16]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, num_workers=2, pin_memory=True)

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set up model - using EfficientNet B2
model = models.efficientnet_b2(pretrained=True)

# Freeze most of the network
for param in model.features.parameters():
    param.requires_grad = False

# Unfreeze the last few layers for fine-tuning
for i, layer in enumerate(model.features):
    if i >= len(model.features) - 4:  # Unfreeze the last 4 layers
        for param in layer.parameters():
            param.requires_grad = True

# Modify the classifier
num_classes = df['class_id'].nunique()
model.classifier = nn.Sequential(
    nn.Dropout(p=0.3, inplace=True),
    nn.Linear(in_features=1408, out_features=512),
    nn.BatchNorm1d(512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, num_classes)
)

model = model.to(device)

# Use a higher weight decay to reduce overfitting
optimizer = optim.AdamW(
    [
        {'params': [p for n, p in model.features.named_parameters() if p.requires_grad], 'lr': 0.0001},
        {'params': model.classifier.parameters(), 'lr': 0.001}
    ],
    weight_decay=1e-4
)

# Mixed precision training for speedup
scaler = torch.cuda.amp.GradScaler()

# Learning rate scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=2, verbose=True
)

# Loss function with label smoothing for better generalization
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

Using device: cuda


Downloading: "https://download.pytorch.org/models/efficientnet_b2_rwightman-c35c1473.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b2_rwightman-c35c1473.pth
100%|██████████| 35.2M/35.2M [00:00<00:00, 48.4MB/s]
  scaler = torch.cuda.amp.GradScaler()


In [18]:
# Evaluation function
def evaluate(model, val_loader, topk=(1,)):
    model.eval()
    correct = {k: 0 for k in topk}
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # Calculate top-k accuracy
            maxk = max(topk)
            _, preds = outputs.topk(maxk, 1, True, True)
            preds = preds.t()
            correct_preds = preds.eq(labels.view(1, -1).expand_as(preds))

            for k in topk:
                correct[k] += correct_preds[:k].reshape(-1).float().sum(0, keepdim=True).item()

            total += labels.size(0)

    return {k: correct[k] / total for k in topk}

In [19]:
def mixup_data(x, y, alpha=0.2):
    """Applies mixup augmentation to the batch."""
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    """Calculates the mixup loss."""
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


In [20]:
# Training loop
epochs = 25
best_acc = 0.0
patience = 7  # For early stopping
no_improve = 0

print("Starting training...")
for epoch in range(epochs):
    # Training phase
    model.train()
    running_loss = 0.0
    train_correct = 0
    train_total = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        # Apply mixup augmentation with 50% probability
        if np.random.random() < 0.5:
            images, labels_a, labels_b, lam = mixup_data(images, labels)
            use_mixup = True
        else:
            use_mixup = False

        # Mixed precision training
        with torch.cuda.amp.autocast():
            outputs = model(images)
            if use_mixup:
                loss = mixup_criterion(criterion, outputs, labels_a, labels_b, lam)
            else:
                loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

        # Calculate training accuracy (for non-mixup batches only)
        if not use_mixup:
            _, preds = torch.max(outputs, 1)
            train_correct += (preds == labels).sum().item()
            train_total += labels.size(0)
            train_acc = train_correct / train_total if train_total > 0 else 0
        else:
            train_acc = 0  # Don't calculate for mixup batches

        # Update progress bar
        loop.set_postfix(loss=loss.item(), train_acc=f"{train_acc:.4f}")

    # Validation phase
    accuracies = evaluate(model, val_loader, topk=(1, 3, 5))
    val_acc = accuracies[1]  # Top-1 accuracy

    print(f"Epoch {epoch+1}/{epochs}:")
    print(f"  Loss: {running_loss/len(train_loader):.4f}")
    print(f"  Validation Accuracy: {val_acc:.4f}")
    print(f"  Top-3 Accuracy: {accuracies[3]:.4f}")
    print(f"  Top-5 Accuracy: {accuracies[5]:.4f}")

    # Learning rate scheduling
    scheduler.step(val_acc)

    # Save best model
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'accuracy': val_acc,
        }, 'birds_model.pt')
        print(f"Saved model with best accuracy: {best_acc:.4f}")
        no_improve = 0
    else:
        no_improve += 1

    # Early stopping
    if no_improve >= patience:
        print(f"Early stopping triggered after {epoch+1} epochs")
        break

    # Exit if target accuracy is reached
    if best_acc >= 0.85:
        print(f"Target accuracy of 85% reached: {best_acc:.4f}")
        break

print(f"Training completed. Best validation accuracy: {best_acc:.4f}")

# Load the best model
checkpoint = torch.load('birds_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])

# Final evaluation
final_accuracies = evaluate(model, val_loader, topk=(1, 3, 5))
print(f"Final evaluation:")
print(f"  Top-1 Accuracy: {final_accuracies[1]:.4f}")
print(f"  Top-3 Accuracy: {final_accuracies[3]:.4f}")
print(f"  Top-5 Accuracy: {final_accuracies[5]:.4f}")

# If you need to use the model for inference later
def predict_image(image_path, model, transform):
    model.eval()
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(image)
        _, pred = torch.max(output, 1)

    return pred.item()

Starting training...


  with torch.cuda.amp.autocast():
Epoch 1/25: 100%|██████████| 125/125 [00:28<00:00,  4.46it/s, loss=3.15, train_acc=0.0000]


Epoch 1/25:
  Loss: 3.5172
  Validation Accuracy: 0.4620
  Top-3 Accuracy: 0.7400
  Top-5 Accuracy: 0.8600
Saved model with best accuracy: 0.4620


Epoch 2/25: 100%|██████████| 125/125 [00:25<00:00,  4.84it/s, loss=2.28, train_acc=0.0000]


Epoch 2/25:
  Loss: 2.6452
  Validation Accuracy: 0.6360
  Top-3 Accuracy: 0.8800
  Top-5 Accuracy: 0.9560
Saved model with best accuracy: 0.6360


Epoch 3/25: 100%|██████████| 125/125 [00:25<00:00,  4.87it/s, loss=2.48, train_acc=0.0000]


Epoch 3/25:
  Loss: 2.3497
  Validation Accuracy: 0.6880
  Top-3 Accuracy: 0.9160
  Top-5 Accuracy: 0.9700
Saved model with best accuracy: 0.6880


Epoch 4/25: 100%|██████████| 125/125 [00:25<00:00,  4.92it/s, loss=1.95, train_acc=0.5759]


Epoch 4/25:
  Loss: 2.0634
  Validation Accuracy: 0.7380
  Top-3 Accuracy: 0.9240
  Top-5 Accuracy: 0.9600
Saved model with best accuracy: 0.7380


Epoch 5/25: 100%|██████████| 125/125 [00:25<00:00,  4.89it/s, loss=3.43, train_acc=0.0000]


Epoch 5/25:
  Loss: 2.1011
  Validation Accuracy: 0.7500
  Top-3 Accuracy: 0.9200
  Top-5 Accuracy: 0.9620
Saved model with best accuracy: 0.7500


Epoch 6/25: 100%|██████████| 125/125 [00:25<00:00,  4.94it/s, loss=2.81, train_acc=0.0000]


Epoch 6/25:
  Loss: 1.9038
  Validation Accuracy: 0.7680
  Top-3 Accuracy: 0.9260
  Top-5 Accuracy: 0.9720
Saved model with best accuracy: 0.7680


Epoch 7/25: 100%|██████████| 125/125 [00:25<00:00,  4.95it/s, loss=2.05, train_acc=0.6875]


Epoch 7/25:
  Loss: 1.9008
  Validation Accuracy: 0.7580
  Top-3 Accuracy: 0.9260
  Top-5 Accuracy: 0.9660


Epoch 8/25: 100%|██████████| 125/125 [00:25<00:00,  4.93it/s, loss=1.27, train_acc=0.7245]


Epoch 8/25:
  Loss: 1.7768
  Validation Accuracy: 0.7740
  Top-3 Accuracy: 0.9360
  Top-5 Accuracy: 0.9760
Saved model with best accuracy: 0.7740


Epoch 9/25: 100%|██████████| 125/125 [00:25<00:00,  4.96it/s, loss=1.48, train_acc=0.0000]


Epoch 9/25:
  Loss: 1.7731
  Validation Accuracy: 0.7920
  Top-3 Accuracy: 0.9280
  Top-5 Accuracy: 0.9640
Saved model with best accuracy: 0.7920


Epoch 10/25: 100%|██████████| 125/125 [00:30<00:00,  4.08it/s, loss=1.34, train_acc=0.7596]


Epoch 10/25:
  Loss: 1.7405
  Validation Accuracy: 0.7920
  Top-3 Accuracy: 0.9500
  Top-5 Accuracy: 0.9720


Epoch 11/25: 100%|██████████| 125/125 [00:23<00:00,  5.27it/s, loss=1.33, train_acc=0.0000]


Epoch 11/25:
  Loss: 1.6483
  Validation Accuracy: 0.8000
  Top-3 Accuracy: 0.9500
  Top-5 Accuracy: 0.9720
Saved model with best accuracy: 0.8000


Epoch 12/25: 100%|██████████| 125/125 [00:23<00:00,  5.24it/s, loss=1.75, train_acc=0.7817]


Epoch 12/25:
  Loss: 1.5853
  Validation Accuracy: 0.7960
  Top-3 Accuracy: 0.9420
  Top-5 Accuracy: 0.9720


Epoch 13/25: 100%|██████████| 125/125 [00:25<00:00,  4.98it/s, loss=1.33, train_acc=0.8179]


Epoch 13/25:
  Loss: 1.5450
  Validation Accuracy: 0.7860
  Top-3 Accuracy: 0.9520
  Top-5 Accuracy: 0.9680


Epoch 14/25: 100%|██████████| 125/125 [00:24<00:00,  5.12it/s, loss=1.63, train_acc=0.8317]


Epoch 14/25:
  Loss: 1.5316
  Validation Accuracy: 0.7900
  Top-3 Accuracy: 0.9560
  Top-5 Accuracy: 0.9680


Epoch 15/25: 100%|██████████| 125/125 [00:24<00:00,  5.17it/s, loss=1.67, train_acc=0.8478]


Epoch 15/25:
  Loss: 1.4723
  Validation Accuracy: 0.8020
  Top-3 Accuracy: 0.9480
  Top-5 Accuracy: 0.9660
Saved model with best accuracy: 0.8020


Epoch 16/25: 100%|██████████| 125/125 [00:24<00:00,  5.02it/s, loss=1.16, train_acc=0.0000]


Epoch 16/25:
  Loss: 1.4166
  Validation Accuracy: 0.8120
  Top-3 Accuracy: 0.9540
  Top-5 Accuracy: 0.9680
Saved model with best accuracy: 0.8120


Epoch 17/25: 100%|██████████| 125/125 [00:24<00:00,  5.12it/s, loss=1.12, train_acc=0.8681]


Epoch 17/25:
  Loss: 1.3954
  Validation Accuracy: 0.8280
  Top-3 Accuracy: 0.9500
  Top-5 Accuracy: 0.9720
Saved model with best accuracy: 0.8280


Epoch 18/25: 100%|██████████| 125/125 [00:24<00:00,  5.13it/s, loss=1.42, train_acc=0.8904]


Epoch 18/25:
  Loss: 1.4544
  Validation Accuracy: 0.8240
  Top-3 Accuracy: 0.9600
  Top-5 Accuracy: 0.9700


Epoch 19/25: 100%|██████████| 125/125 [00:24<00:00,  5.13it/s, loss=1.28, train_acc=0.0000]


Epoch 19/25:
  Loss: 1.3391
  Validation Accuracy: 0.7980
  Top-3 Accuracy: 0.9500
  Top-5 Accuracy: 0.9680


Epoch 20/25: 100%|██████████| 125/125 [00:24<00:00,  5.03it/s, loss=1.16, train_acc=0.0000]


Epoch 20/25:
  Loss: 1.5024
  Validation Accuracy: 0.8180
  Top-3 Accuracy: 0.9540
  Top-5 Accuracy: 0.9680


Epoch 21/25: 100%|██████████| 125/125 [00:24<00:00,  5.07it/s, loss=0.878, train_acc=0.8875]


Epoch 21/25:
  Loss: 1.3717
  Validation Accuracy: 0.8260
  Top-3 Accuracy: 0.9560
  Top-5 Accuracy: 0.9700


Epoch 22/25: 100%|██████████| 125/125 [00:24<00:00,  5.07it/s, loss=0.996, train_acc=0.9022]


Epoch 22/25:
  Loss: 1.3681
  Validation Accuracy: 0.8300
  Top-3 Accuracy: 0.9560
  Top-5 Accuracy: 0.9720
Saved model with best accuracy: 0.8300


Epoch 23/25: 100%|██████████| 125/125 [00:24<00:00,  5.02it/s, loss=0.953, train_acc=0.8952]


Epoch 23/25:
  Loss: 1.3833
  Validation Accuracy: 0.8300
  Top-3 Accuracy: 0.9520
  Top-5 Accuracy: 0.9720


Epoch 24/25: 100%|██████████| 125/125 [00:24<00:00,  5.00it/s, loss=0.92, train_acc=0.9090]


Epoch 24/25:
  Loss: 1.3146
  Validation Accuracy: 0.8380
  Top-3 Accuracy: 0.9600
  Top-5 Accuracy: 0.9760
Saved model with best accuracy: 0.8380


Epoch 25/25: 100%|██████████| 125/125 [00:25<00:00,  4.96it/s, loss=0.946, train_acc=0.9032]


Epoch 25/25:
  Loss: 1.3288
  Validation Accuracy: 0.8300
  Top-3 Accuracy: 0.9600
  Top-5 Accuracy: 0.9680
Training completed. Best validation accuracy: 0.8380
Final evaluation:
  Top-1 Accuracy: 0.8380
  Top-3 Accuracy: 0.9600
  Top-5 Accuracy: 0.9760
