In [1]:
import os, glob, numpy as np, pandas as pd
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T
import librosa
import torch.nn.functional as F
from tqdm.notebook import tqdm
import timm
from peft import LoraConfig, get_peft_model


2025-04-30 04:32:02.749392: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745987522.768681   89913 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745987522.774727   89913 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745987522.790032   89913 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745987522.790048   89913 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745987522.790050   89913 computation_placer.cc:177] computation placer alr

In [2]:

# Paths
DATA_ROOT = '/home/jovyan/Data/birdclef-2025'
DEN_DIR   = '/home/jovyan/Features/denoised'
CSV_PATH  = os.path.join(DATA_ROOT, 'train.csv')

# Hyperparameters
PANNS_SR      = 32000
N_FFT         = 2048
HOP_LENGTH    = 512
N_MELS        = 128
BATCH_SIZE    = 32
TRAIN_FRAC    = 0.8
NUM_EPOCHS    = 2
LR            = 1e-4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

meta = pd.read_csv('/home/jovyan/Data/birdclef-2025/train.csv')
label2idx = {lab:i for i, lab in enumerate(sorted(meta['primary_label'].unique()))}
num_classes = len(label2idx)

Device: cuda


In [3]:
# Cell 3 — Spectrogram helper
def calculate_mel_spectrogram(wave_np, sr=PANNS_SR,
                              n_fft=N_FFT, hop_length=HOP_LENGTH,
                              n_mels=N_MELS):
    S = librosa.feature.melspectrogram(
        y=wave_np, sr=sr, n_fft=n_fft,
        hop_length=hop_length, n_mels=n_mels
    )
    return librosa.power_to_db(S, ref=np.max)


In [4]:
# Cell 4 — Resize transform for Tensor [1,H,W] → [1,300,300]
class ResizeTensor:
    def __init__(self, size):
        self.size = size
    def __call__(self, x):
        # x: Tensor [C,H,W]
        return F.interpolate(
            x.unsqueeze(0),
            size=self.size,
            mode='bilinear',
            align_corners=False
        ).squeeze(0)


In [5]:
# Cell 5 — Dataset of denoised MEL (1-channel) + label
class DenoisedMelDataset(Dataset):
    def __init__(self, den_dir, transform=None):
        self.files = sorted(glob.glob(f"{den_dir}/**/*.npz", recursive=True))
        self.transform = transform
        # read labels once
        self.labels = [int(np.load(f)['label']) for f in self.files]
        self.num_classes = len(set(self.labels))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        data = np.load(self.files[idx])
        wave = data['waveform']                   # [CHUNK_SAMPLES]
        lbl  = int(data['label'])
        mel  = calculate_mel_spectrogram(wave)    # [N_MELS, T]
        img  = torch.from_numpy(mel.astype(np.float32)).unsqueeze(0)  # [1,H,W]
        if self.transform:
            img = self.transform(img)             # [1,300,300]
        return img, lbl


In [6]:
# Cell 6 — Prepare DataLoaders
# 1) build dataset
tf = T.Compose([
    ResizeTensor((300,300)),
    T.Normalize(mean=[0.0], std=[1.0]),  # your mel is already dB, adjust if needed
])
ds = DenoisedMelDataset(DEN_DIR, transform=tf)
n_train = int(len(ds)*TRAIN_FRAC)
train_ds, val_ds = random_split(
    ds, [n_train, len(ds)-n_train],
    generator=torch.Generator().manual_seed(42)
)

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=2, pin_memory=True
)
val_loader = DataLoader(
    val_ds, batch_size=BATCH_SIZE, shuffle=False,
    num_workers=2, pin_memory=True
)

print(f"Num classes: {num_classes}, train samples: {len(train_ds)}, val samples: {len(val_ds)}")


Num classes: 206, train samples: 8988, val samples: 2248


In [7]:
# Cell 6 — Load EfficientNet-B3 & Inspect Linears
base_model = timm.create_model(
    'tf_efficientnet_b3_ns',
    pretrained=True,
    in_chans=1,
    num_classes=num_classes
).to(device)

# Print all Linear submodules
print("Linear modules found in EfficientNet-B3:")
for name, module in base_model.named_modules():
    if isinstance(module, nn.Linear):
        print("  →", name)

  model = create_fn(


Linear modules found in EfficientNet-B3:
  → classifier


In [8]:
# Cell 7 — Apply LoRA to the correct module(s)
# Based on the output above, pick the module name substring that matches
# e.g. if you saw "classifier" or "classifier.1", use that.
TARGET_MODULES = ["conv_pw", "conv_dw", "conv_pwl", "conv_head"]     # adjust if output showed e.g. "classifier.1"
MODULES_TO_SAVE = ["classifier"]     # keep the head fully trainable

lora_config = LoraConfig(
    r=12,
    lora_alpha=24,
    target_modules=TARGET_MODULES,
    lora_dropout=0.1,
    bias="none",
    modules_to_save=MODULES_TO_SAVE
)
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()
model = model.to(device)

trainable params: 5,572,334 || all params: 16,584,468 || trainable%: 33.5997


In [9]:
# Cell 8 — Loss, Optimizer & Scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=LR
)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, patience=2, factor=0.5, verbose=True
)

In [10]:
# Cell 9 — Train & Validate Loop
best_acc = 0.0
for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    run_loss, run_corr, run_tot = 0.0, 0, 0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch} Train"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        out  = model(imgs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

        run_loss += loss.item()*imgs.size(0)
        preds    = out.argmax(1)
        run_corr += (preds==labels).sum().item()
        run_tot  += imgs.size(0)

    train_loss = run_loss/run_tot
    train_acc  = run_corr/run_tot

    # — Validate —
    model.eval()
    val_loss, val_corr, val_tot = 0.0, 0, 0
    with torch.no_grad():
        for imgs, labels in tqdm(val_loader, desc=f"Epoch {epoch} Val"):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)
            val_loss += loss.item()*imgs.size(0)
            val_corr += (out.argmax(1)==labels).sum().item()
            val_tot  += imgs.size(0)

    val_loss /= val_tot
    val_acc  = val_corr/val_tot
    scheduler.step(val_loss)

    print(f"\nEpoch {epoch}: Train loss {train_loss:.4f}, acc {train_acc:.4f} | "
          f"Val loss {val_loss:.4f}, acc {val_acc:.4f}")

    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), 'best_effnetb3_lora.pth')
        print(f"✔️ Saved best (val_acc={best_acc:.4f})")

print(f"\n Finished training. Best Val Acc: {best_acc:.4f}")


Epoch 1 Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 1 Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 1: Train loss 4.5062, acc 0.0922 | Val loss 3.8644, acc 0.1810
✔️ Saved best (val_acc=0.1810)


Epoch 2 Train:   0%|          | 0/281 [00:00<?, ?it/s]

Epoch 2 Val:   0%|          | 0/71 [00:00<?, ?it/s]


Epoch 2: Train loss 3.1641, acc 0.3230 | Val loss 3.0318, acc 0.3839
✔️ Saved best (val_acc=0.3839)

 Finished training. Best Val Acc: 0.3839
