# Real World Scenario Modelling 

In [1]:
import os

mel_dir = "/kaggle/input/melfiles"
npy_files = [f for f in os.listdir(mel_dir) if f.endswith(".npy")]
print(f"üìä Total .npy files found: {len(npy_files)}")

üìä Total .npy files found: 28564


In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import timm
from torch.amp import autocast, GradScaler

# GPU optimization
torch.backends.cudnn.benchmark = True

# Load metadata
df = pd.read_csv("/kaggle/input/subset-df-full/subset_df_full_meta.csv")
species_list = sorted(df['primary_label'].unique())
species_to_index = {s: i for i, s in enumerate(species_list)}
meta_cols = df.columns.difference(['filename', 'primary_label', 'secondary_labels'])

# Dataset
class BirdCLEFDatasetPhase3(Dataset):
    def __init__(self, df, mel_dir, species_to_index):
        self.df = df.reset_index(drop=True)
        self.mel_dir = mel_dir
        self.species_to_index = species_to_index
        self.metadata_cols = meta_cols

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        mel_path = os.path.join(self.mel_dir, row['filename'].replace('/', '_').replace('.ogg', '.npy'))
        mel = np.load(mel_path)
        x = torch.tensor(mel).float().unsqueeze(0)
        metadata = torch.tensor(row[self.metadata_cols].values.astype(np.float32))
        y = np.zeros(len(self.species_to_index), dtype=np.float32)
        y[self.species_to_index[row['primary_label']]] = 1
        return x, metadata, torch.tensor(y)

# Model
class EffNetV2WithMetadata(nn.Module):
    def __init__(self, num_classes, metadata_dim):
        super().__init__()
        self.cnn = timm.create_model("tf_efficientnetv2_b0", pretrained=True, in_chans=1, num_classes=0)
        self.classifier = nn.Sequential(
            nn.Linear(1280 + metadata_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes),
        )

    def forward(self, x, meta):
        features = self.cnn(x)
        combined = torch.cat([features, meta], dim=1)
        return self.classifier(combined)

# Focal loss
class FocalLoss(nn.Module):
    def __init__(self, alpha=1.0, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, targets):
        probs = torch.sigmoid(logits)  # Apply sigmoid here
        bce = F.binary_cross_entropy_with_logits(logits, targets, reduction='none')
        pt = torch.exp(-bce)
        loss = self.alpha * (1 - pt) ** self.gamma * bce
        return loss.mean()

# Setup
mel_dir = "/kaggle/input/melfiles"
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_ds = BirdCLEFDatasetPhase3(train_df, mel_dir, species_to_index)
val_ds = BirdCLEFDatasetPhase3(val_df, mel_dir, species_to_index)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=32, num_workers=2, pin_memory=True)

device = torch.device("cuda")
model = EffNetV2WithMetadata(num_classes=len(species_list), metadata_dim=len(meta_cols)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = FocalLoss()
scaler = GradScaler()

best_val_loss = float("inf")
patience = 5
counter = 0

# Training loop
for epoch in range(15):
    model.train()
    total_loss = 0
    for xb, meta, yb in train_loader:
        xb, meta, yb = xb.to(device, non_blocking=True), meta.to(device), yb.to(device)
        optimizer.zero_grad()
        with autocast(device_type='cuda'):
            pred = model(xb, meta)
            loss = criterion(pred, yb)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for xb, meta, yb in val_loader:
            xb, meta, yb = xb.to(device, non_blocking=True), meta.to(device), yb.to(device)
            with autocast(device_type='cuda'):
                pred = model(xb, meta)
                loss = criterion(pred, yb)
                val_loss += loss.item()
    val_loss /= len(val_loader)

    print(f"‚úÖ Epoch {epoch+1} | Train Loss: {total_loss/len(train_loader):.4f} | Val Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "/kaggle/working/best_model_effnetv2_focal.pth")
        counter = 0
        print("üíæ Model saved!")
    else:
        counter += 1
        if counter >= patience:
            print("‚èπÔ∏è Early stopping.")
            break

model.safetensors:   0%|          | 0.00/28.8M [00:00<?, ?B/s]

‚úÖ Epoch 1 | Train Loss: 0.0091 | Val Loss: 0.0057
üíæ Model saved!
‚úÖ Epoch 2 | Train Loss: 0.0048 | Val Loss: 0.0038
üíæ Model saved!
‚úÖ Epoch 3 | Train Loss: 0.0034 | Val Loss: 0.0031
üíæ Model saved!
‚úÖ Epoch 4 | Train Loss: 0.0026 | Val Loss: 0.0029
üíæ Model saved!
‚úÖ Epoch 5 | Train Loss: 0.0021 | Val Loss: 0.0027
üíæ Model saved!
‚úÖ Epoch 6 | Train Loss: 0.0018 | Val Loss: 0.0027
üíæ Model saved!
‚úÖ Epoch 7 | Train Loss: 0.0014 | Val Loss: 0.0027
‚úÖ Epoch 8 | Train Loss: 0.0012 | Val Loss: 0.0026
üíæ Model saved!
‚úÖ Epoch 9 | Train Loss: 0.0010 | Val Loss: 0.0026
‚úÖ Epoch 10 | Train Loss: 0.0009 | Val Loss: 0.0030
‚úÖ Epoch 11 | Train Loss: 0.0008 | Val Loss: 0.0029
‚úÖ Epoch 12 | Train Loss: 0.0006 | Val Loss: 0.0029
‚úÖ Epoch 13 | Train Loss: 0.0006 | Val Loss: 0.0033
‚èπÔ∏è Early stopping.
