# Setup

In [1]:
import torch
import numpy as np
import json
import torch.nn as nn
import utils

In [2]:
# sudo modprobe nvidia_uvm
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
DATASET_NAME = "DATASET_CNN_small"
MODEL_NAME = 'DeeperSEnd'

In [4]:
DATASET_PATH = f'../segments/{DATASET_NAME}'
TRAIN_PATH = f"{DATASET_PATH}/train"
TEST_PATH = f"{DATASET_PATH}/test"
MODEL_PATH = f'./models/{MODEL_NAME}'

# Load the model

In [5]:
mappings = utils.get_mappings(TRAIN_PATH, include_None=True)

In [6]:
model_class = utils.load_model_class(MODEL_NAME)
model = model_class(len(mappings))
model.to(device)
saving_path = f'models/{MODEL_NAME}/checkpoint_small_2.pth'
checkpoint = torch.load(saving_path)
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [7]:
with open(f"utils/{DATASET_NAME}/dataset_config.json") as f:
    dataset_config = json.load(f)

In [8]:
dataset_config["mappings"]

{'Wind': 0,
 'Regulus ignicapilla_Common Firecrest': 1,
 'Sylvia atricapilla_Eurasian Blackcap': 2,
 'Fringilla coelebs_Common Chaffinch': 3,
 'Troglodytes troglodytes_Eurasian Wren': 4,
 'Muscicapa striata_Spotted Flycatcher': 5,
 'Phylloscopus collybita_Common Chiffchaff': 6,
 'Turdus viscivorus_Mistle Thrush': 7,
 'Glaucidium passerinum_Eurasian Pygmy-Owl': 8,
 'Pyrrhula pyrrhula_Eurasian Bullfinch': 9,
 'Pecking': 10,
 'Periparus ater_Coal Tit': 11,
 'Prunella modularis_Dunnock': 12,
 'Lophophanes cristatus_Crested Tit': 13,
 'Regulus regulus_Goldcrest': 14,
 'Insect': 15,
 'Aeroplane': 16,
 'Vegetation': 17,
 'Rain': 18,
 'Turdus merula_Eurasian Blackbird': 19,
 'Certhia familiaris_Eurasian Treecreeper': 20,
 'Erithacus rubecula_European Robin': 21,
 'Turdus philomelos_Song Thrush': 22,
 'Bat': 23,
 'Loxia curvirostra_Common Crossbill': 24,
 'Dendrocopos major_Great Spotted Woodpecker': 25,
 'Dryocopus martius_Black Woodpecker': 26,
 'Phylloscopus trochilus_Willow Warbler': 27,
 '

# Custom Thresholds Creation

In [9]:
from collections import defaultdict
from sklearn.metrics import f1_score

def calculate_conf_scores(valid_loader, model, mappings):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    model.to(device)
    
    conf_scores = defaultdict(list)

    with torch.no_grad():
        for mel_spec, _, file_path in valid_loader:
            mel_spec = mel_spec.to(device)

            # Estraggo la specie corretta dal path
            correct_species = file_path[0].split("/")[-2]
            outputs = model(mel_spec)
            probs = torch.sigmoid(outputs)[0].cpu().numpy()

            for i, prob in enumerate(probs):
                species_name = list(mappings.keys())[i]
                is_correct = species_name == correct_species
                conf_scores[species_name].append((prob, is_correct))

    return conf_scores

In [10]:
def compute_best_thresholds(conf_scores, num_thresholds=100, min_thresh=0.05, max_thresh=0.95):
    thresholds = {}

    for species, values in conf_scores.items():
        probs, truths = zip(*values)
        probs = np.array(probs)
        truths = np.array(truths).astype(int)

        best_thresh = 0.5
        best_f1 = 0.0

        for thresh in np.linspace(min_thresh, max_thresh, num_thresholds):
            preds = (probs >= thresh).astype(int)
            f1 = f1_score(truths, preds, zero_division=0)
            if f1 > best_f1:
                best_f1 = f1
                best_thresh = thresh

        thresholds[species] = best_thresh
        print(f"📊 {species} -> {best_thresh:.3f}, F1-score: {best_f1:.3f}")

    return thresholds



In [11]:
def compute_distribution_based_thresholds(conf_scores, min_thresh=0.05, max_thresh=0.95):
    thresholds = {}

    for species, values in conf_scores.items():
        probs, truths = zip(*values)
        probs = np.array(probs)
        truths = np.array(truths).astype(int)

        correct_probs = np.array([p for p, t in zip(probs, truths) if t == 1])

        if len(correct_probs) == 0:
            thresholds[species] = 0.5  # fallback
            print(f"⚠️ Specie: {species}, nessuna predizione corretta trovata. Soglia fallback: 0.5")
            continue

        mean_conf = np.mean(probs)
        std_conf = np.std(probs)
        threshold = np.clip(mean_conf - 2 * std_conf, min_thresh, max_thresh)

        thresholds[species] = threshold
        print(f"📊 {species} -> {threshold:.3f}, (mean={mean_conf:.3f}, std={std_conf}), samples: {len(correct_probs)}")

    return thresholds


In [12]:
valid_loader = utils.get_dataloader(dataset_config, split="valid", batch_size=1)
conf_scores = calculate_conf_scores(valid_loader, model, dataset_config["mappings"])
best_thresholds = compute_best_thresholds(conf_scores)
# best_thresholds = compute_distribution_based_thresholds(conf_scores)

FileNotFoundError: [Errno 2] No such file or directory: '../segments/DATASET_CNN_small/train_specs/None/20190621_110000_241_5.pt'

# Test Model

In [None]:
from sklearn.metrics import average_precision_score, precision_recall_fscore_support

def compute_samplewise_mAP(y_true, y_probs):
    """
    Calcola la sample-wise mAP (media delle AP per ogni sample).
    """
    ap_per_sample = []
    for i in range(y_true.shape[0]):
        if np.sum(y_true[i]) == 0:
            continue  # Evita sample senza label positive
        ap = average_precision_score(y_true[i], y_probs[i])
        ap_per_sample.append(ap)
    return np.mean(ap_per_sample)

def compute_classwise_mAP(y_true, y_probs):
    """
    Calcola la class-wise mAP (media delle AP per ogni classe).
    """
    ap_per_class = []
    for i in range(y_true.shape[1]):
        if np.sum(y_true[:, i]) == 0:
            continue  # Evita classi mai presenti
        ap = average_precision_score(y_true[:, i], y_probs[:, i])
        ap_per_class.append(ap)
    return np.mean(ap_per_class)

def compute_f05(y_true, y_pred):
    _, _, f05, _ = precision_recall_fscore_support(
        y_true, y_pred, beta=0.5, average='macro', zero_division=0
    )
    return f05

In [None]:
inverse_mappings = {value: key for key, value in mappings.items()}

In [None]:
import numpy as np
from sklearn.metrics import classification_report
import csv
import os

def test_model(model, dataset_config, batch_size=100, thresholds=0.5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"\n🧬 Advanced testing on: {device}")
    test_pred_segments = {}

    test_loader = utils.get_dataloader(dataset_config, split="test", batch_size=batch_size, shuffle=False)
    model.eval()
    criterion = nn.BCEWithLogitsLoss()
    class_names = list(dataset_config['mappings'].keys())
    total_loss = 0.0
    all_preds = []
    all_probs = []
    all_labels = []

    use_custom_threshold = isinstance(thresholds, dict)

    with torch.no_grad():
        for mel_spec, labels, file_path in test_loader:

            basename = os.path.splitext(file_path[0].split("/")[-1])[0]
            date, time, segm1, segm2 = basename.split("_")
            audio_name = "_".join([date, time]) + ".WAV"
            segm = "_".join([segm1, segm2])
            test_pred_segments.setdefault(audio_name, {})
            test_pred_segments[audio_name].setdefault(segm, {})

            mel_spec = mel_spec.to(device)
            labels = labels.to(device)

            outputs = model(mel_spec)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            probs = torch.sigmoid(outputs)

            if use_custom_threshold:
                batch_preds = torch.zeros_like(probs)
                for i, class_name in enumerate(class_names):
                    thresh = thresholds.get(class_name, 0.5)
                    batch_preds[:, i] = (probs[:, i] > thresh).float()
            else:
                batch_preds = (probs > thresholds).float()

            correct_probs = probs * batch_preds
            conf_scores = {
                inverse_mappings[i]: correct_probs[0, i].item()
                for i in range(correct_probs.size(1))
                if correct_probs[0, i].item() != 0
            }
            test_pred_segments[audio_name][segm].update(conf_scores)

            all_probs.append(probs.cpu())
            all_preds.append(batch_preds.cpu())
            all_labels.append(labels.cpu())

    avg_loss = total_loss / len(test_loader)
    all_probs = torch.cat(all_probs).numpy()
    all_preds = torch.cat(all_preds).numpy()
    all_labels = torch.cat(all_labels).numpy()

    samplewise_map = compute_samplewise_mAP(all_labels, all_probs)  # chiamata mAP
    classwise_map = compute_classwise_mAP(all_labels, all_probs)    # chiamata cmAP
    f05_score = compute_f05(all_labels, all_preds)

    with open(f"models/{MODEL_NAME}/metrics_output.csv", mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Metric", "Value"])
        writer.writerow(["mAP (sample-wise)", samplewise_map])
        writer.writerow(["cmAP (class-wise)", classwise_map])
        writer.writerow(["F0.5 Score", f05_score])

    # 👇 Report
    clf_report = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True, zero_division=0)
    return avg_loss, clf_report, samplewise_map, classwise_map, f05_score, test_pred_segments

In [None]:
avg_loss, clf_report, samplewise_map, classwise_map, f05_score, test_pred_segments = test_model(model, dataset_config, thresholds=best_thresholds)


🧬 Advanced testing on: cuda


In [None]:
torch.cuda.empty_cache()

In [None]:
with open(f"{MODEL_PATH}/test_pred_segments.json", "w") as f:
    json.dump(test_pred_segments, f)

In [None]:
print("mAP Score: ", samplewise_map)
print("mcAP Score: ", classwise_map)
print("F0.5 Score: ", f05_score)

mAP Score:  0.42513475328648626
mcAP Score:  0.5645742075671072
F0.5 Score:  0.4059520060196651


In [None]:
from io import StringIO
import pandas as pd

clf_report_df = pd.read_json(StringIO(json.dumps(clf_report)), orient='index')
clf_report_df

Unnamed: 0,precision,recall,f1-score,support
Wind,0.237467,0.803571,0.366599,112
Regulus ignicapilla_Common Firecrest,0.425656,0.924051,0.582834,790
Sylvia atricapilla_Eurasian Blackcap,0.773356,0.933194,0.84579,1437
Fringilla coelebs_Common Chaffinch,0.525145,0.788316,0.630366,2636
Troglodytes troglodytes_Eurasian Wren,0.29321,0.669014,0.407725,142
Muscicapa striata_Spotted Flycatcher,0.905405,0.87013,0.887417,154
Phylloscopus collybita_Common Chiffchaff,0.72486,0.981096,0.833735,529
Turdus viscivorus_Mistle Thrush,0.54918,0.917808,0.687179,73
Glaucidium passerinum_Eurasian Pygmy-Owl,0.589744,1.0,0.741935,23
Pyrrhula pyrrhula_Eurasian Bullfinch,0.666667,0.666667,0.666667,3


In [None]:
with open(f'{MODEL_PATH}/classification_report.json', 'w') as f:
    json.dump(clf_report, f)

In [None]:
torch.cuda.empty_cache()