In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from torchvision.models import resnet50

In [3]:
DEVICE       = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
THRESHOLD    = 0.5
FEATURE_BASE = "/home/jovyan/Features"
CHECKPOINT   = "resnet50_epoch_1.pt" 

In [4]:
tax_df      = pd.read_csv("/home/jovyan/Data/birdclef-2025/taxonomy.csv")
classes     = sorted(tax_df['primary_label'].astype(str).tolist())
num_classes = len(classes)

In [6]:
def get_resnet50_multilabel(num_classes: int):
    model = resnet50(weights=None)
    # adapt first conv to accept 1‑channel input
    model.conv1 = nn.Conv2d(
        in_channels=1,
        out_channels=model.conv1.out_channels,
        kernel_size=model.conv1.kernel_size,
        stride=model.conv1.stride,
        padding=model.conv1.padding,
        bias=False
    )
    # replace the final fully‑connected layer
    in_feat = model.fc.in_features
    model.fc = nn.Linear(in_feat, num_classes)
    return model

model = get_resnet50_multilabel(num_classes).to(DEVICE)

# load checkpoint weights
ckpt = torch.load(CHECKPOINT, map_location=DEVICE)
model.load_state_dict(ckpt["model_state"])
model.eval()

ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
test_df = pd.read_csv(os.path.join(FEATURE_BASE, "manifest_test.csv"))
sample  = test_df.sample(1).iloc[0]
print("Running inference on chunk:", sample.chunk_id)

Running inference on chunk: XC147511_chk5


In [8]:
rel_path = sample.mel_aug_path.lstrip(os.sep)
npz_path = os.path.join(FEATURE_BASE, "mel_aug", rel_path)
npz      = np.load(npz_path)
mel      = npz["mel"]   # shape: (n_mels, n_frames)

# convert to tensor [1,1,n_mels,n_frames]
x = torch.from_numpy(mel).unsqueeze(0).unsqueeze(0).float().to(DEVICE)

In [9]:
with torch.no_grad():
    logits = model(x)                  # [1, num_classes]
    probs  = torch.sigmoid(logits)[0]  # [num_classes]

In [10]:
pred_idxs = (probs >= THRESHOLD).nonzero(as_tuple=False).flatten().tolist()
if isinstance(pred_idxs, int):
    pred_idxs = [pred_idxs]

print(f"\nPredictions (threshold ≥ {THRESHOLD}):")
for i in pred_idxs:
    print(f"  • {classes[i]}: {probs[i]:.3f}")


Predictions (threshold ≥ 0.5):
  • whtdov: 1.000


In [11]:
top5 = probs.cpu().topk(5)
print("\nTop 5 predictions:")
for score, idx in zip(top5.values, top5.indices):
    print(f"  • {classes[idx]}: {score:.4f}")


Top 5 predictions:
  • whtdov: 0.9999
  • speowl1: 0.0163
  • strcuc1: 0.0011
  • strowl1: 0.0011
  • 66531: 0.0007
