In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet50
from torch.amp import autocast
from sklearn.metrics import precision_recall_curve

In [2]:
DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
FEATURE_DIR  = "/home/jovyan/Features"
MANIFEST_CSV = os.path.join(FEATURE_DIR, "manifest_test.csv")
TAXONOMY_CSV = "/home/jovyan/Data/birdclef-2025/taxonomy.csv"
CHECKPOINT   = "best_resnet50.pt"   # path to your saved best checkpoint
MEL_KEY      = "mel"               # key inside the .npz for augmented mel
THRESHOLD    = 0.5

In [3]:
tax_df  = pd.read_csv(TAXONOMY_CSV)
classes = sorted(tax_df["primary_label"].astype(str).tolist())
num_classes = len(classes)
class_to_idx = {c:i for i, c in enumerate(classes)}


In [4]:
DROPOUT = 0.5

def get_resnet50_multilabel(num_classes):
    m = resnet50(weights=None)
    # adapt stem to 1-channel
    m.conv1 = nn.Conv2d(1,
                        m.conv1.out_channels,
                        kernel_size=m.conv1.kernel_size,
                        stride=m.conv1.stride,
                        padding=m.conv1.padding,
                        bias=False)
    # replace final layer
    m.fc = nn.Linear(m.fc.in_features, num_classes)
    return m

In [5]:
model = get_resnet50_multilabel(num_classes).to(DEVICE)
state = torch.load(CHECKPOINT, map_location=DEVICE)
model.load_state_dict(state)
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [6]:
df = pd.read_csv(MANIFEST_CSV)
row = df.sample(1).iloc[0]
chunk_id = row.chunk_id
rel_path = row.mel_aug_path.lstrip(os.sep)
mel_path = os.path.join(FEATURE_DIR, "mel_aug", rel_path)

print(f"Running inference on chunk: {chunk_id}")

Running inference on chunk: XC716461_chk13


In [7]:
data = np.load(mel_path)
mel  = data[MEL_KEY]                     # shape [n_mels, n_frames]
x    = torch.from_numpy(mel).unsqueeze(0).unsqueeze(0).float()  # [1,1,n_mels,n_frames]
x    = x.to(DEVICE, non_blocking=True)

In [8]:
with torch.no_grad(), autocast(device_type="cuda"):
    logits = model(x)                  # [1, num_classes]
    probs  = torch.sigmoid(logits)[0].cpu().numpy()

In [9]:
ml_preds = [(classes[i], float(probs[i])) 
            for i in range(num_classes) if probs[i] >= THRESHOLD]

print(f"\nMulti‑label predictions (prob ≥ {THRESHOLD}):")
if ml_preds:
    for label, score in ml_preds:
        print(f"  • {label}: {score:.3f}")
else:
    print("  • <none>")


Multi‑label predictions (prob ≥ 0.5):
  • cocwoo1: 0.769


In [10]:
primary_idx  = int(probs.argmax())
primary_pred = classes[primary_idx]
primary_score= float(probs[primary_idx])

print(f"\nPrimary‑label (top‑1) prediction:")
print(f"  → {primary_pred}: {primary_score:.3f}")


Primary‑label (top‑1) prediction:
  → cocwoo1: 0.769
