In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.amp import autocast
from sklearn.metrics import precision_recall_curve
from pathlib import Path


In [2]:
DEVICE      = torch.device("cuda" if torch.cuda.is_available() else "cpu")
FEATURE_DIR = "/home/jovyan/Features"
MANIFEST    = os.path.join(FEATURE_DIR, "manifest_test.csv")
TAXONOMY    = "/home/jovyan/Data/birdclef-2025/taxonomy.csv"
CHECKPOINT  = "best_emb_mlp.pt"      # from your training run
EMB_KEY     = "embedding"
THRESHOLD   = 0.5                    # same as training

In [3]:
tax_df = pd.read_csv(TAXONOMY)
classes = sorted(tax_df["primary_label"].astype(str).tolist())
num_classes = len(classes)
class_to_idx = {c:i for i,c in enumerate(classes)}


In [4]:
HIDDEN_DIMS = [2048, 1024, 512]
DROPOUT      = 0.5

class EmbeddingClassifier(nn.Module):
    def __init__(self, emb_dim, num_cls):
        super().__init__()
        layers = []
        in_dim = emb_dim
        for h in HIDDEN_DIMS:
            layers += [
                nn.Linear(in_dim, h),
                nn.BatchNorm1d(h),
                nn.ReLU(),
                nn.Dropout(DROPOUT)
            ]
            in_dim = h
        layers.append(nn.Linear(in_dim, num_cls))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

In [5]:
man_df = pd.read_csv(MANIFEST)
sample_rel = man_df.iloc[0]["emb_path"].lstrip(os.sep)
sample_path = os.path.join(FEATURE_DIR, "embeddings", sample_rel)
emb_dim = np.load(sample_path)[EMB_KEY].shape[1]

model = EmbeddingClassifier(emb_dim, num_classes).to(DEVICE)
state = torch.load(CHECKPOINT, map_location=DEVICE)
model.load_state_dict(state)
model.eval()

EmbeddingClassifier(
  (net): Sequential(
    (0): Linear(in_features=2048, out_features=2048, bias=True)
    (1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=2048, out_features=1024, bias=True)
    (5): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): Dropout(p=0.5, inplace=False)
    (8): Linear(in_features=1024, out_features=512, bias=True)
    (9): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Dropout(p=0.5, inplace=False)
    (12): Linear(in_features=512, out_features=206, bias=True)
  )
)

In [6]:
row = man_df.sample(1).iloc[0]
chunk_id = row.chunk_id
rel_path = row.emb_path.lstrip(os.sep)
emb_path = os.path.join(FEATURE_DIR, "embeddings", rel_path)

print(f"Running inference on chunk: {chunk_id}")


Running inference on chunk: XC446439_chk3


In [7]:
emb_arr = np.load(emb_path)[EMB_KEY]          # shape (n_windows, emb_dim)
x_input = emb_arr.mean(axis=0).astype(np.float32)
x_tensor= torch.from_numpy(x_input).unsqueeze(0).to(DEVICE)  # [1, emb_dim]

with torch.no_grad(), autocast(device_type="cuda"):
    logits = model(x_tensor)                  # [1, num_classes]
    probs  = torch.sigmoid(logits)[0].cpu().numpy()

In [8]:
ml_preds = [classes[i] for i, p in enumerate(probs) if p >= THRESHOLD]
ml_scores= [p for p in probs if p >= THRESHOLD]

print("\nMulti‑label predictions (≥ {:.2f}):".format(THRESHOLD))
if ml_preds:
    for lab, sc in zip(ml_preds, ml_scores):
        print(f"  • {lab}: {sc:.3f}")
else:
    print("  • <none>")


Multi‑label predictions (≥ 0.50):
  • 41778: 0.582
  • 65373: 0.548
  • 65962: 0.629
  • anhing: 0.716
  • bkcdon: 0.616
  • blhpar1: 0.705
  • bubwre1: 0.550
  • bucmot3: 0.525
  • colara1: 0.674
  • cotfly1: 0.811
  • crbtan1: 0.570
  • eardov1: 0.620
  • fotfly: 0.706
  • grasal4: 0.634
  • grnkin: 0.611
  • grysee1: 0.566
  • gybmar: 0.702
  • palhor2: 0.610
  • paltan1: 0.605
  • rosspo1: 0.565
  • rugdov: 0.610
  • rumfly1: 0.691
  • rutjac1: 0.513
  • saffin: 0.567
  • shtfly1: 0.611
  • sobtyr1: 0.774
  • socfly1: 0.706
  • solsan: 0.782
  • spepar1: 0.896
  • srwswa1: 0.901
  • stbwoo2: 0.537
  • strfly1: 0.901
  • thbeup1: 0.836
  • trokin: 0.836
  • tropar: 0.552
  • verfly: 0.801
  • whbant1: 0.707
  • whtdov: 0.719
  • whwswa1: 0.738
  • yebela1: 0.649
  • yecspi2: 0.526
  • yelori1: 0.699
  • yeofly1: 0.661


In [9]:
primary_idx  = np.argmax(probs)
primary_pred = classes[primary_idx]
primary_score= probs[primary_idx]

print(f"\nPrimary‑label (top‑1) prediction:")
print(f"  → {primary_pred}: {primary_score:.3f}")


Primary‑label (top‑1) prediction:
  → srwswa1: 0.901
