In [1]:
import os
import sys
import subprocess
import psutil
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from peft import get_peft_model, LoraConfig
import mlflow
import mlflow.pytorch
from pynvml import (
    nvmlInit,
    nvmlDeviceGetHandleByIndex,
    nvmlDeviceGetUtilizationRates,
    nvmlDeviceGetMemoryInfo,
    nvmlDeviceGetTemperature,
    NVML_TEMPERATURE_GPU
)
from sklearn.metrics import f1_score

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

mlflow.set_experiment("Panns_CNN10_Finetune")
try: mlflow.end_run()
except: pass
mlflow.start_run(log_system_metrics=True)

# log GPU/CPU info
gpu_info = next(
    (subprocess.run(cmd, capture_output=True, text=True).stdout
     for cmd in ["nvidia-smi","rocm-smi"]
     if subprocess.run(f"command -v {cmd}", shell=True,
                       capture_output=True).returncode == 0),
    "No GPU found."
)
mlflow.log_text(gpu_info, "gpu-info.txt")

nvmlInit()
gpu_handle = nvmlDeviceGetHandleByIndex(0)
def log_system_metrics(step=None):
    mlflow.log_metric("system.cpu.utilization", psutil.cpu_percent(), step=step)
    mem = psutil.virtual_memory()
    mlflow.log_metric("system.memory.used", mem.used, step=step)
    mlflow.log_metric("system.memory.percent", mem.percent, step=step)
    g = nvmlDeviceGetUtilizationRates(gpu_handle)
    mlflow.log_metric("system.gpu.0.utilization", g.gpu, step=step)
    m = nvmlDeviceGetMemoryInfo(gpu_handle)
    mlflow.log_metric("system.gpu.0.memory.used", m.used, step=step)
    mlflow.log_metric("system.gpu.0.memory.percent",
                      (m.used/m.total)*100, step=step)
    t = nvmlDeviceGetTemperature(gpu_handle, NVML_TEMPERATURE_GPU)
    mlflow.log_metric("system.gpu.0.temperature", t, step=step)

Using device: cuda


2025/05/07 12:53:37 INFO mlflow.tracking.fluent: Experiment with name 'Panns_CNN10_Finetune' does not exist. Creating a new experiment.
2025/05/07 12:53:37 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


In [3]:
TAXONOMY_CSV = "/home/jovyan/Data/birdclef-2025/taxonomy.csv"
tax_df       = pd.read_csv(TAXONOMY_CSV)
CLASSES      = sorted(tax_df["primary_label"].astype(str).tolist())
NUM_CLASSES  = len(CLASSES)

In [4]:
class DenoisedDataset(Dataset):
    def __init__(self, manifest_csv, metadata_csv, feature_base, classes,
                 sample_rate=32000, duration=10.0):
        m_df = pd.read_csv(manifest_csv)
        m_df["audio_path"] = (
            m_df["audio_path"].str.lstrip(os.sep)
                 .apply(lambda p: os.path.join(feature_base, "denoised", p))
        )
        meta = pd.read_csv(metadata_csv, usecols=["filename","secondary_labels"])
        meta["rid"]  = meta.filename.str.replace(r"\.ogg$","",regex=True)
        meta["secs"] = meta.secondary_labels.fillna("").str.split()
        sec_map = dict(zip(meta.rid, meta.secs))

        self.rows        = []
        self.label2idx   = {lab:i for i, lab in enumerate(classes)}
        self.num_classes = len(classes)
        self.wav_len     = int(sample_rate * duration)

        for _, row in tqdm(m_df.iterrows(), total=len(m_df),
                          desc=f"Building {os.path.basename(manifest_csv)}"):
            rid  = row.chunk_id.split("_chk")[0]
            labs = [row.primary_label] + sec_map.get(rid, [])
            labs = [l for l in labs if l in self.label2idx]
            self.rows.append({"path": row.audio_path, "labels": labs})

    def __len__(self):
        return len(self.rows)

    def __getitem__(self, idx):
        rec = self.rows[idx]
        wav, sr = torchaudio.load(rec["path"])  # (channels, samples)
        wav = torch.mean(wav, dim=0)             # mono
        if wav.size(0) < self.wav_len:
            wav = F.pad(wav, (0, self.wav_len - wav.size(0)))
        else:
            wav = wav[:self.wav_len]
        y = torch.zeros(self.num_classes, dtype=torch.float32)
        for l in rec["labels"]:
            y[self.label2idx[l]] = 1.0
        return wav, y


In [5]:
REPO_DIR = "audioset_tagging_cnn"
if not os.path.isdir(REPO_DIR):
    os.system("git clone https://github.com/qiuqiangkong/audioset_tagging_cnn.git")
sys.path.insert(0, os.path.join(REPO_DIR, "pytorch"))
from models import Cnn10

def get_panns_cnn10(num_classes, device):
    model = Cnn10(
        sample_rate=32000,
        window_size=1024,
        hop_size=320,
        mel_bins=64,
        fmin=50,
        fmax=14000,
        classes_num=num_classes
    )
    return model.to(device)

model     = get_panns_cnn10(NUM_CLASSES, DEVICE)
criterion = nn.BCEWithLogitsLoss()  # will override with pos_weight below
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

Cloning into 'audioset_tagging_cnn'...


In [6]:
FEATURE_BASE   = "/home/jovyan/Features"
TRAIN_MANIFEST = os.path.join(FEATURE_BASE, "manifest_train.csv")
TEST_MANIFEST  = os.path.join(FEATURE_BASE, "manifest_test.csv")
TRAIN_CSV      = "/home/jovyan/Data/birdclef-2025/train.csv"

BATCH_SIZE, LR, EPOCHS = 32, 1e-4, 20

train_ds = DenoisedDataset(TRAIN_MANIFEST, TRAIN_CSV, FEATURE_BASE, CLASSES)
test_ds  = DenoisedDataset(TEST_MANIFEST,  TRAIN_CSV, FEATURE_BASE, CLASSES)

# compute class-wise pos_weight (fast)
counts = np.zeros(NUM_CLASSES, dtype=np.int64)
for row in train_ds.rows:
    for lab in row["labels"]:
        counts[ train_ds.label2idx[lab] ] += 1
n = len(train_ds)
neg = n - counts
pos_weight = np.ones(NUM_CLASSES, dtype=np.float32)
mask = counts > 0
pos_weight[mask] = neg[mask] / counts[mask]
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.from_numpy(pos_weight).to(DEVICE))

mlflow.log_params({
    "model":        "CNN10_panns",
    "input":        "denoised_audio",
    "num_classes":  NUM_CLASSES,
    "batch_size":   BATCH_SIZE,
    "lr":           LR,
    "epochs":       EPOCHS
})

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE,
    shuffle=True,  num_workers=4, pin_memory=True
)
test_loader  = DataLoader(
    test_ds,  batch_size=BATCH_SIZE,
    shuffle=False, num_workers=4, pin_memory=True
)

Building manifest_train.csv: 100%|██████████| 108451/108451 [00:05<00:00, 20704.89it/s]
Building manifest_test.csv: 100%|██████████| 11022/11022 [00:00<00:00, 23479.12it/s]


In [7]:
best_f1, best_ckpt = 0.0, None

for epoch in range(1, 1+1):
    # Train
    model.train()
    train_bar = tqdm(train_loader, desc=f"Epoch {epoch} Train", unit="batch")
    run_loss, total = 0.0, 0
    for xb, yb in train_bar:
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        optimizer.zero_grad()
        out    = model(xb)                    
        logits = out["clipwise_output"] 
        loss   = criterion(logits, yb)
        loss.backward()
        optimizer.step()

        run_loss += loss.item() * xb.size(0)
        total   += xb.size(0)
        train_bar.set_postfix({"loss": f"{run_loss/total:.4f}"})

    # Test + micro‑F1
    model.eval()
    all_preds, all_tgts = [], []
    test_bar = tqdm(test_loader, desc=f"Epoch {epoch} Test ", unit="batch")
    test_loss, total = 0.0, 0
    with torch.no_grad():
        for xb, yb in test_bar:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            out    = model(xb)                    
            logits = out["clipwise_output"]
            loss   = criterion(logits, yb)

            test_loss += loss.item() * xb.size(0)
            total    += xb.size(0)

            probs = torch.sigmoid(logits).cpu().numpy()
            preds = (probs >= 0.5).astype(int)
            all_preds.append(preds)
            all_tgts.append(yb.cpu().numpy())
            test_bar.set_postfix({"loss": f"{test_loss/total:.4f}"})

    all_preds = np.vstack(all_preds)
    all_tgts  = np.vstack(all_tgts)
    micro_f1  = f1_score(all_tgts, all_preds, average="micro", zero_division=0)
    test_loss = test_loss / total

    # Checkpoint
    ckpt = f"cnn10_epoch_{epoch}.pt"
    torch.save({
        "epoch":       epoch,
        "model_state": model.state_dict(),
        "optim_state": optimizer.state_dict(),
        "train_loss":  run_loss/len(train_ds),
        "test_loss":   test_loss,
        "micro_f1":    micro_f1
    }, ckpt)

    mlflow.log_metrics({
        "train_loss": run_loss/len(train_ds),
        "test_loss":  test_loss,
        "micro_f1":   micro_f1
    }, step=epoch)
    log_system_metrics(step=epoch)
    mlflow.log_artifact(ckpt, artifact_path="checkpoints")

    if micro_f1 > best_f1:
        best_f1, best_ckpt = micro_f1, ckpt

    print(f"→ Epoch {epoch}/{EPOCHS}  micro‑F1={micro_f1:.4f}")

Epoch 1 Train: 100%|██████████| 3390/3390 [12:38<00:00,  4.47batch/s, loss=1.3288]
Epoch 1 Test : 100%|██████████| 345/345 [00:31<00:00, 10.85batch/s, loss=1.2423]


→ Epoch 1/20  micro‑F1=0.0097


In [8]:
mlflow.log_metric("best_micro_f1", best_f1)
mlflow.log_artifact(best_ckpt, artifact_path="model")
mlflow.end_run()

2025/05/07 13:08:37 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/05/07 13:08:38 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


🏃 View run skillful-crow-302 at: http://192.5.87.49:8000/#/experiments/1/runs/3ff9849b6b3747ca873dbcee6e2e66db
🧪 View experiment at: http://192.5.87.49:8000/#/experiments/1
