In [1]:
import os
import subprocess
import psutil
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import timm
from peft import get_peft_model, LoraConfig
import mlflow
import mlflow.pytorch
from sklearn.metrics import f1_score, average_precision_score, precision_recall_curve
from torch.amp import autocast, GradScaler
from pynvml import (
    nvmlInit, nvmlDeviceGetHandleByIndex,
    nvmlDeviceGetUtilizationRates, nvmlDeviceGetMemoryInfo,
    nvmlDeviceGetTemperature, NVML_TEMPERATURE_GPU
)

In [2]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", DEVICE)

mlflow.set_experiment("EfficientNetB3_LoRA")
if mlflow.active_run(): 
    mlflow.end_run()
mlflow.start_run(log_system_metrics=True)

# log GPU/CPU info
gpu_info = next(
    (subprocess.run(cmd, capture_output=True, text=True).stdout
     for cmd in ["nvidia-smi","rocm-smi"]
     if subprocess.run(f"command -v {cmd}", shell=True, capture_output=True).returncode==0),
    "No GPU found."
)
mlflow.log_text(gpu_info, "gpu-info.txt")

nvmlInit()
gpu_handle = nvmlDeviceGetHandleByIndex(0)
def log_sys(step=None):
    mlflow.log_metric("system.cpu.utilization", psutil.cpu_percent(), step=step)
    m = psutil.virtual_memory()
    mlflow.log_metric("system.memory.used", m.used, step=step)
    mlflow.log_metric("system.memory.percent", m.percent, step=step)
    u = nvmlDeviceGetUtilizationRates(gpu_handle)
    mlflow.log_metric("system.gpu.utilization", u.gpu, step=step)
    gm = nvmlDeviceGetMemoryInfo(gpu_handle)
    mlflow.log_metric("system.gpu.mem.used", gm.used, step=step)
    mlflow.log_metric("system.gpu.mem.percent", (gm.used/gm.total)*100, step=step)
    t = nvmlDeviceGetTemperature(gpu_handle, NVML_TEMPERATURE_GPU)
    mlflow.log_metric("system.gpu.temperature", t, step=step)


2025/05/07 17:05:27 INFO mlflow.tracking.fluent: Experiment with name 'EfficientNetB3_LoRA' does not exist. Creating a new experiment.
2025/05/07 17:05:27 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Using device: cuda


In [3]:
BATCH_SIZE     = 64
LR             = 1e-4
WEIGHT_DECAY   = 1e-4
EPOCHS         = 20
SAVE_EPOCH_CK  = False
BEST_CKPT      = "best_effb3_lora.pt"

TAXONOMY_CSV   = "/home/jovyan/Data/birdclef-2025/taxonomy.csv"
TRAIN_MAN      = "/home/jovyan/Features/manifest_train.csv"
TEST_MAN       = "/home/jovyan/Features/manifest_test.csv"
TRAIN_CSV      = "/home/jovyan/Data/birdclef-2025/train.csv"
FEATURE_BASE   = "/home/jovyan/Features"

TARGET_MODULES  = ["conv_pw","conv_dw","conv_pwl","conv_head"]
MODULES_TO_SAVE = ["classifier"]

tax_df     = pd.read_csv(TAXONOMY_CSV)
CLASSES    = sorted(tax_df["primary_label"].astype(str).tolist())
NUM_CLASSES= len(CLASSES)

mlflow.log_params({
    "model":           "efficientnet_b3_lora",
    "input":           "mel",
    "num_classes":     NUM_CLASSES,
    "batch_size":      BATCH_SIZE,
    "lr":              LR,
    "weight_decay":    WEIGHT_DECAY,
    "epochs":          EPOCHS,
    "save_epoch_ck":   SAVE_EPOCH_CK,
    "lora_r":          12,
    "lora_alpha":      24,
    "lora_dropout":    0.1,
    "target_modules":  TARGET_MODULES
})

In [4]:
class MelDataset(Dataset):
    def __init__(self, manifest_csv, meta_csv, base, classes, key="mel"):
        m = pd.read_csv(manifest_csv)
        m["mel_path"] = (
            m["mel_path"].astype(str)
             .str.lstrip(os.sep)
             .apply(lambda p: os.path.join(base,"mel",p))
        )
        meta = pd.read_csv(meta_csv, usecols=["filename","secondary_labels"])
        meta["rid"]  = meta.filename.str.replace(r"\.ogg$","",regex=True)
        meta["secs"]= meta.secondary_labels.fillna("").str.split()
        sec_map = dict(zip(meta.rid, meta.secs))

        self.rows     = []
        self.idx_map  = {c:i for i,c in enumerate(classes)}
        self.num_cls  = len(classes)
        self.key      = key

        for _, r in tqdm(m.iterrows(), total=len(m), desc="Building dataset"):
            rid     = r.chunk_id.split("_chk")[0]
            labs    = [r.primary_label] + sec_map.get(rid, [])
            labs    = [l for l in labs if l in self.idx_map]
            prim_idx= self.idx_map[r.primary_label]
            self.rows.append((r.mel_path, labs, prim_idx))

    def __len__(self):
        return len(self.rows)

    def __getitem__(self, i):
        path, labs, prim_idx = self.rows[i]
        arr = np.load(path)[self.key]                # [n_mels,n_frames]
        x   = torch.from_numpy(arr).unsqueeze(0).float()  # [1,n_mels,n_frames]
        y   = torch.zeros(self.num_cls, dtype=torch.float32)
        for c in labs:
            y[self.idx_map[c]] = 1.0
        return x, y, prim_idx

In [5]:
def mixup(x, y, alpha=0.4):
    lam = np.random.beta(alpha,alpha) if alpha>0 else 1.0
    idx = torch.randperm(x.size(0), device=x.device)
    return lam*x + (1-lam)*x[idx], y, y[idx], lam

train_ds = MelDataset(TRAIN_MAN, TRAIN_CSV, FEATURE_BASE, CLASSES)
test_ds  = MelDataset(TEST_MAN,  TRAIN_CSV, FEATURE_BASE, CLASSES)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE,
                          shuffle=True,  num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=4, pin_memory=True)


Building dataset: 100%|██████████| 108451/108451 [00:05<00:00, 21684.08it/s]
Building dataset: 100%|██████████| 11022/11022 [00:00<00:00, 22867.73it/s]


In [8]:
def build_efficientnetb3_lora(num_classes):
    base = timm.create_model("efficientnet_b3", pretrained=True)
    # patch forward
    orig_forward = base.forward
    def forward_patch(*args, **kwargs):
        # Accept either positional input or named input_ids/inputs_embeds
        if "input_ids" in kwargs:
            x = kwargs.pop("input_ids")
        elif "inputs_embeds" in kwargs:
            x = kwargs.pop("inputs_embeds")
        elif len(args) > 0:
            x = args[0]
        else:
            raise ValueError("No input tensor found")
        # drop any other transformer‐style kwargs silently
        for k in list(kwargs.keys()):
            kwargs.pop(k)
        return orig_forward(x)
    base.forward = forward_patch

    # adapt to 1‑channel
    stem = base.conv_stem
    base.conv_stem = nn.Conv2d(1, stem.out_channels,
                               kernel_size=stem.kernel_size,
                               stride=stem.stride,
                               padding=stem.padding,
                               bias=False)
    # replace head
    in_f = base.classifier.in_features
    base.classifier = nn.Linear(in_f, num_classes)
    # apply LoRA
    lora_cfg = LoraConfig(
        r=12, lora_alpha=24,
        target_modules=TARGET_MODULES,
        lora_dropout=0.1,
        bias="none",
        modules_to_save=MODULES_TO_SAVE,
        task_type="FEATURE_EXTRACTION",
        inference_mode=False
    )
    model = get_peft_model(base, lora_cfg)
    return model

model     = build_efficientnetb3_lora(NUM_CLASSES).to(DEVICE)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=LR,
    steps_per_epoch=len(train_loader),
    epochs=EPOCHS,
    pct_start=0.1, div_factor=10
)
scaler    = GradScaler()

In [9]:
best_f1, best_ap, best_acc = 0.0, 0.0, 0.0
thresholds = np.full(NUM_CLASSES, 0.5, dtype=np.float32)

for epoch in range(1, EPOCHS+1):
    # — Train —
    model.train()
    run_loss, total = 0.0, 0
    tbar = tqdm(train_loader, desc=f"[{epoch}/{EPOCHS}] Train", unit="batch")
    for xb, yb, _ in tbar:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        # optional mixup
        xb_m, ya, yb_m, lam = mixup(xb, yb)
        optimizer.zero_grad()
        with autocast(device_type="cuda"):
            logits = model(xb_m)
            loss   = lam*criterion(logits, ya) + (1-lam)*criterion(logits, yb_m)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()

        bs = xb.size(0)
        run_loss += loss.item()*bs
        total    += bs
        tbar.set_postfix({"loss": f"{run_loss/total:.4f}"})
    train_loss = run_loss/total

    # — Eval —
    model.eval()
    all_scores, all_tgts, all_prims = [], [], []
    val_loss, total = 0.0, 0
    with torch.no_grad():
        for xb, yb, prim_idx in tqdm(test_loader, desc=f"[{epoch}/{EPOCHS}] Eval ", unit="batch"):
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            with autocast(device_type="cuda"):
                logits = model(xb)
                val_loss += criterion(logits, yb).item()*xb.size(0)
                scores   = torch.sigmoid(logits).cpu().numpy()
            all_scores.append(scores)
            all_tgts.append(yb.cpu().numpy())
            all_prims.extend(prim_idx.tolist())
            total += xb.size(0)

    val_loss /= total
    scores = np.vstack(all_scores)
    tgts   = np.vstack(all_tgts)
    prims  = np.array(all_prims, dtype=int)

    # threshold calibration
    for i in range(NUM_CLASSES):
        y_true = tgts[:,i]
        if 0 < y_true.sum() < len(y_true):
            prec, rec, th = precision_recall_curve(y_true, scores[:,i])
            f1_vals = 2*prec*rec/(prec+rec+1e-8)
            best    = np.nanargmax(f1_vals[:-1])
            thresholds[i] = th[best]

    preds      = (scores >= thresholds).astype(int)
    micro_f1   = f1_score(tgts, preds, average="micro", zero_division=0)
    micro_ap   = average_precision_score(tgts, scores, average="micro")
    top1       = scores.argmax(axis=1)
    primary_acc= (top1 == prims).mean()

    # checkpoint best only
    if micro_f1 > best_f1:
        best_f1, best_ap, best_acc = micro_f1, micro_ap, primary_acc
        torch.save(model.state_dict(), BEST_CKPT)
        mlflow.log_artifact(BEST_CKPT, artifact_path="model")

    # log metrics
    mlflow.log_metrics({
        "train_loss":    train_loss,
        "val_loss":      val_loss,
        "micro_f1":      micro_f1,
        "micro_ap":      micro_ap,
        "primary_acc":   primary_acc
    }, step=epoch)
    log_sys(step=epoch)

    print(f"→ Epoch {epoch}/{EPOCHS}  "
          f"F1={micro_f1:.4f}  AP={micro_ap:.4f}  PrimAcc={primary_acc:.4f}")

[1/20] Train: 100%|██████████| 1695/1695 [03:23<00:00,  8.34batch/s, loss=0.1076]
[1/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.12batch/s]


→ Epoch 1/20  F1=0.0197  AP=0.0146  PrimAcc=0.0312


[2/20] Train: 100%|██████████| 1695/1695 [03:30<00:00,  8.03batch/s, loss=0.0272]
[2/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.03batch/s]


→ Epoch 2/20  F1=0.0527  AP=0.1329  PrimAcc=0.1541


[3/20] Train: 100%|██████████| 1695/1695 [03:27<00:00,  8.18batch/s, loss=0.0217]
[3/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.88batch/s]


→ Epoch 3/20  F1=0.2380  AP=0.3856  PrimAcc=0.3759


[4/20] Train: 100%|██████████| 1695/1695 [03:29<00:00,  8.09batch/s, loss=0.0180]
[4/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 22.19batch/s]


→ Epoch 4/20  F1=0.4612  AP=0.5119  PrimAcc=0.4821


[5/20] Train: 100%|██████████| 1695/1695 [03:22<00:00,  8.37batch/s, loss=0.0160]
[5/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.05batch/s]


→ Epoch 5/20  F1=0.5487  AP=0.5574  PrimAcc=0.5298


[6/20] Train: 100%|██████████| 1695/1695 [03:24<00:00,  8.28batch/s, loss=0.0147]
[6/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.06batch/s]


→ Epoch 6/20  F1=0.5665  AP=0.5846  PrimAcc=0.5567


[7/20] Train: 100%|██████████| 1695/1695 [03:23<00:00,  8.35batch/s, loss=0.0140]
[7/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.15batch/s]


→ Epoch 7/20  F1=0.6105  AP=0.6419  PrimAcc=0.6005


[8/20] Train: 100%|██████████| 1695/1695 [03:39<00:00,  7.73batch/s, loss=0.0132]
[8/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.83batch/s]


→ Epoch 8/20  F1=0.5767  AP=0.6502  PrimAcc=0.6098


[9/20] Train: 100%|██████████| 1695/1695 [03:24<00:00,  8.28batch/s, loss=0.0125]
[9/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.82batch/s]


→ Epoch 9/20  F1=0.6308  AP=0.6538  PrimAcc=0.6154


[10/20] Train: 100%|██████████| 1695/1695 [03:30<00:00,  8.05batch/s, loss=0.0122]
[10/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.01batch/s]


→ Epoch 10/20  F1=0.6029  AP=0.6678  PrimAcc=0.6292


[11/20] Train: 100%|██████████| 1695/1695 [03:22<00:00,  8.36batch/s, loss=0.0118]
[11/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.07batch/s]


→ Epoch 11/20  F1=0.6198  AP=0.6775  PrimAcc=0.6386


[12/20] Train: 100%|██████████| 1695/1695 [03:23<00:00,  8.34batch/s, loss=0.0117]
[12/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.05batch/s]


→ Epoch 12/20  F1=0.6009  AP=0.6801  PrimAcc=0.6428


[13/20] Train: 100%|██████████| 1695/1695 [03:28<00:00,  8.14batch/s, loss=0.0111]
[13/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.95batch/s]


→ Epoch 13/20  F1=0.6293  AP=0.6788  PrimAcc=0.6423


[14/20] Train: 100%|██████████| 1695/1695 [03:24<00:00,  8.29batch/s, loss=0.0109]
[14/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.61batch/s]


→ Epoch 14/20  F1=0.6094  AP=0.6841  PrimAcc=0.6457


[15/20] Train: 100%|██████████| 1695/1695 [03:25<00:00,  8.25batch/s, loss=0.0106]
[15/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.05batch/s]


→ Epoch 15/20  F1=0.6397  AP=0.6857  PrimAcc=0.6472


[16/20] Train: 100%|██████████| 1695/1695 [03:29<00:00,  8.08batch/s, loss=0.0103]
[16/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.02batch/s]


→ Epoch 16/20  F1=0.6403  AP=0.6865  PrimAcc=0.6503


[17/20] Train: 100%|██████████| 1695/1695 [03:27<00:00,  8.16batch/s, loss=0.0106]
[17/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.63batch/s]


→ Epoch 17/20  F1=0.6166  AP=0.6852  PrimAcc=0.6524


[18/20] Train: 100%|██████████| 1695/1695 [03:27<00:00,  8.17batch/s, loss=0.0102]
[18/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.02batch/s]


→ Epoch 18/20  F1=0.6289  AP=0.6895  PrimAcc=0.6535


[19/20] Train: 100%|██████████| 1695/1695 [03:27<00:00,  8.19batch/s, loss=0.0103]
[19/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 24.01batch/s]


→ Epoch 19/20  F1=0.6390  AP=0.6887  PrimAcc=0.6550


[20/20] Train: 100%|██████████| 1695/1695 [03:26<00:00,  8.19batch/s, loss=0.0102]
[20/20] Eval : 100%|██████████| 173/173 [00:07<00:00, 23.76batch/s]


→ Epoch 20/20  F1=0.6463  AP=0.6906  PrimAcc=0.6553


In [10]:
mlflow.log_metric("best_micro_f1", best_f1)
mlflow.log_metric("best_micro_ap", best_ap)
mlflow.log_metric("best_primary_acc", best_acc)
mlflow.end_run()

2025/05/07 18:24:32 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/05/07 18:24:32 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run dapper-grouse-349 at: http://192.5.87.49:8000/#/experiments/7/runs/30fa06b0068143b2b0cae573e6581e87
🧪 View experiment at: http://192.5.87.49:8000/#/experiments/7
