In [1]:
import os, glob, numpy as np, pandas as pd
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T
import librosa
import torch.nn.functional as F
from tqdm.notebook import tqdm
import timm
from peft import LoraConfig, get_peft_model


In [2]:

# Paths
DATA_ROOT      = '/home/jovyan/Data/birdclef-2025'
MEL_DIR        = '/home/jovyan/Features/mel'
MANIFEST_TRAIN = os.path.join('/home/jovyan/Features', 'manifest_train.csv')
MANIFEST_TEST  = os.path.join('/home/jovyan/Features', 'manifest_test.csv')
TAXONOMY_CSV       = '/home/jovyan/Features/taxonomy.csv'

# Hyperparameters
PANNS_SR      = 32000
N_FFT         = 2048
HOP_LENGTH    = 512
N_MELS        = 128
BATCH_SIZE    = 32
NUM_EPOCHS    = 5
LR            = 1e-4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

tax_df      = pd.read_csv(TAXONOMY_CSV)
labels_all  = sorted(tax_df['primary_label'].unique())
label2idx   = {lab: i for i, lab in enumerate(labels_all)}
num_classes = len(labels_all)

Device: cuda


In [3]:
class ResizeTensor:
    def __init__(self, size):
        self.size = size
    def __call__(self, x):
        # x: Tensor [C,H,W]
        return F.interpolate(
            x.unsqueeze(0),
            size=self.size,
            mode='bilinear',
            align_corners=False
        ).squeeze(0)

In [4]:
class MelManifestDataset(Dataset):
    def __init__(self, manifest_fp, mel_dir, transform=None):
        self.df        = pd.read_csv(manifest_fp)
        self.mel_dir   = mel_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        lbl   = label2idx[row['primary_label']]
        rel   = row['mel_path'].lstrip('/')              # e.g. "speciesA/chunk0.npz"
        full  = os.path.join(self.mel_dir, rel)
        data  = np.load(full)
        mel   = data['mel']                              # [N_MELS, T]
        img   = torch.from_numpy(mel.astype(np.float32)).unsqueeze(0)  # [1,H,W]
        if self.transform:
            img = self.transform(img)
        return img, lbl

In [5]:
train_tf = T.Compose([
    ResizeTensor((300,300)),
    T.RandomHorizontalFlip(),
    T.Lambda(lambda x: x.repeat(3,1,1)),      # 1→3 channels
    T.Normalize([0.0]*3, [1.0]*3)             # adjust mean/std if needed
])
test_tf = T.Compose([
    ResizeTensor((300,300)),
    T.Lambda(lambda x: x.repeat(3,1,1)),
    T.Normalize([0.0]*3, [1.0]*3)
])

train_ds = MelManifestDataset(MANIFEST_TRAIN, MEL_DIR, transform=train_tf)
test_ds  = MelManifestDataset(MANIFEST_TEST,  MEL_DIR, transform=test_tf)

train_loader = DataLoader(
    train_ds, batch_size=BATCH_SIZE, shuffle=True,
    num_workers=8, pin_memory=True
)
test_loader = DataLoader(
    test_ds,  batch_size=BATCH_SIZE, shuffle=False,
    num_workers=8, pin_memory=True
)

print(f"Num classes: {num_classes}, train samples: {len(train_ds)}, test samples: {len(test_ds)}")

Num classes: 206, train samples: 69676, test samples: 11474


In [6]:
# Cell 6 — Load EfficientNet-B3 & Inspect Linears
base_model = timm.create_model(
    'tf_efficientnet_b3_ns',
    pretrained=True,
    in_chans=3,
    num_classes=num_classes
).to(device)

# Print all Linear submodules
print("Linear modules found in EfficientNet-B3:")
for name, module in base_model.named_modules():
    if isinstance(module, nn.Linear):
        print("  →", name)

  model = create_fn(


Linear modules found in EfficientNet-B3:
  → classifier


In [7]:
# Cell 7 — Apply LoRA to the correct module(s)
# Based on the output above, pick the module name substring that matches
# e.g. if you saw "classifier" or "classifier.1", use that.
TARGET_MODULES = ["conv_pw", "conv_dw", "conv_pwl", "conv_head"]     # adjust if output showed e.g. "classifier.1"
MODULES_TO_SAVE = ["classifier"]     # keep the head fully trainable

lora_config = LoraConfig(
    r=12,
    lora_alpha=24,
    target_modules=TARGET_MODULES,
    lora_dropout=0.1,
    bias="none",
    modules_to_save=MODULES_TO_SAVE
)
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()
model = model.to(device)

trainable params: 5,572,334 || all params: 16,585,188 || trainable%: 33.5983


In [8]:
# Cell 8 — Loss, Optimizer & Scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=LR
)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, patience=2, factor=0.5
)

In [9]:
# Cell 9 — Train & Validate Loop
best_acc = 0.0
for epoch in range(1, NUM_EPOCHS+1):
    # — Train —
    model.train()
    run_loss, run_corr, run_tot = 0.0, 0, 0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch} Train"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        out  = model(imgs)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

        run_loss += loss.item()*imgs.size(0)
        preds    = out.argmax(1)
        run_corr += (preds==labels).sum().item()
        run_tot  += imgs.size(0)

    train_loss = run_loss/run_tot
    train_acc  = run_corr/run_tot

    # — Validate —
    model.eval()
    test_loss, test_corr, test_tot = 0.0, 0, 0
    with torch.no_grad():
        for imgs, labels in tqdm(test_loader, desc=f"Epoch {epoch} Val"):
            imgs, labels = imgs.to(device), labels.to(device)
            out  = model(imgs)
            loss = criterion(out, labels)
            test_loss += loss.item()*imgs.size(0)
            test_corr += (out.argmax(1)==labels).sum().item()
            test_tot  += imgs.size(0)

    test_loss /= test_tot
    test_acc  = test_corr/test_tot
    scheduler.step(test_loss)

    print(f"\nEpoch {epoch}: Train loss {train_loss:.4f}, acc {train_acc:.4f} | "
          f"Val loss {test_loss:.4f}, acc {test_acc:.4f}")

    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), 'best_effnetb3_lora.pth')
        print(f"✔️ Saved best (test_acc={best_acc:.4f})")

print(f"\n Finished training. Best Val Acc: {best_acc:.4f}")


Epoch 1 Train:   0%|          | 0/2178 [00:00<?, ?it/s]

Epoch 1 Val:   0%|          | 0/359 [00:00<?, ?it/s]


Epoch 1: Train loss 2.9297, acc 0.3748 | Val loss 2.2957, acc 0.5054
✔️ Saved best (test_acc=0.5054)


Epoch 2 Train:   0%|          | 0/2178 [00:00<?, ?it/s]

Epoch 2 Val:   0%|          | 0/359 [00:00<?, ?it/s]


Epoch 2: Train loss 1.8108, acc 0.5968 | Val loss 1.9769, acc 0.5790
✔️ Saved best (test_acc=0.5790)


Epoch 3 Train:   0%|          | 0/2178 [00:00<?, ?it/s]

Epoch 3 Val:   0%|          | 0/359 [00:00<?, ?it/s]


Epoch 3: Train loss 1.4695, acc 0.6674 | Val loss 1.9267, acc 0.5951
✔️ Saved best (test_acc=0.5951)


Epoch 4 Train:   0%|          | 0/2178 [00:00<?, ?it/s]

Epoch 4 Val:   0%|          | 0/359 [00:00<?, ?it/s]


Epoch 4: Train loss 1.2590, acc 0.7094 | Val loss 1.8379, acc 0.6136
✔️ Saved best (test_acc=0.6136)


Epoch 5 Train:   0%|          | 0/2178 [00:00<?, ?it/s]

Epoch 5 Val:   0%|          | 0/359 [00:00<?, ?it/s]


Epoch 5: Train loss 1.1134, acc 0.7401 | Val loss 1.8439, acc 0.6130

 Finished training. Best Val Acc: 0.6136
