# Statistiche di valutazione del modello addestrato

In [1]:
from typing import Optional, Union  
import numpy as np
import pandas as pd
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
    confusion_matrix,
    classification_report,
    matthews_corrcoef,
    cohen_kappa_score,
    balanced_accuracy_score,
    brier_score_loss,
    log_loss
)

def evaluate_binary_classifier(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    y_score: Optional[np.ndarray] = None,   # 🠈 qui
    pos_label: Union[int, str] = 1,         # 🠈 e qui
    show_report: bool = True
) -> pd.Series:
    """
    Restituisce un pandas Series con le metriche principali per la classificazione binaria.
    Se passi anche y_score, calcola ROC-AUC, PR-AUC, log-loss e Brier score.
    """
    # Controlli veloci
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)
    if y_score is not None:
        y_score = np.asarray(y_score)

    # Metriche base
    metrics = {
        "accuracy"              : accuracy_score(y_true, y_pred),
        "balanced_accuracy"     : balanced_accuracy_score(y_true, y_pred),
        "precision"             : precision_score(y_true, y_pred, pos_label=pos_label),
        "recall"                : recall_score(y_true, y_pred, pos_label=pos_label),
        "f1"                    : f1_score(y_true, y_pred, pos_label=pos_label),
        "matthews_corrcoef"     : matthews_corrcoef(y_true, y_pred),
        "cohen_kappa"           : cohen_kappa_score(y_true, y_pred),
    }

    # Metriche che richiedono punteggi/ probabilità
    if y_score is not None:
        metrics.update({
            "roc_auc"           : roc_auc_score(y_true, y_score),
            "pr_auc"            : average_precision_score(y_true, y_score),
            "log_loss"          : log_loss(y_true, y_score, labels=[0,1]),
            "brier_score_loss"  : brier_score_loss(y_true, y_score),
        })

    # Confusion matrix “flattened”
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0, pos_label]).ravel()
    metrics.update({
        "true_negatives" : tn,
        "false_positives": fp,
        "false_negatives": fn,
        "true_positives" : tp,
    })

    if show_report:
        print("=== Confusion Matrix ===")
        print(confusion_matrix(y_true, y_pred, labels=[0, pos_label]))
        print(f"True Positive: {tp}")
        print(f"True Negative: {tn}")
        print(f"False Positive: {fp}")
        print(f"False Negative: {fn}")
        print("\n=== Classification Report ===")
        print(classification_report(y_true, y_pred, labels=[0, pos_label], target_names=["neg", "pos"]))

    return pd.Series(metrics, name="metrics")

# ESEMPIO D’USO -----------------------------------------------------------
# y_true  = np.random.randint(0, 2, size=100)
# y_pred  = np.random.randint(0, 2, size=100)
# y_score = np.random.rand(100)
# results = evaluate_binary_classifier(y_true, y_pred, y_score)
# print("\n=== Metriche ===")
# print(results)


In [2]:
import os
import torch
from arguments import prepare_finetuning_args, Args
from dataset import build_dataset
from torch.utils.data import DataLoader
import models
from timm.models import create_model



def predict_label(model, videos):
    videos = videos.to(args.device)
    with torch.no_grad():
        logits = model(videos)  # (B, nb_classes)
        pred = torch.argmax(logits, dim=1)  # intero con l'indice di classe

    pred_classes = pred.detach().cpu().numpy()

    return pred_classes

def get_path_pred_label(model, data_loader):
    all_paths = []
    all_labels = []
    all_preds = []
    for videos, labels, folder_path in data_loader:
        predicted_classes = predict_label(model, videos) # shape (batch, num_class)
        labels = labels.detach().cpu().numpy()
        
        all_labels.extend(labels)
        all_preds.extend(predicted_classes)
        all_paths.extend(folder_path)

    return all_paths, all_preds, all_labels


args = prepare_finetuning_args()
##### prende da val_supervised.csv
dataset_val, _ = build_dataset(is_train=False, test_mode=False, args=args)  

data_loader_val = DataLoader(
    dataset_val,
    batch_size=args.batch_size,
    shuffle=True,         # Per estrarre sample casuali
    num_workers=args.num_workers,
    pin_memory=args.pin_mem,
    drop_last=False
)

get_prediction = False

# voglio prendere le predizioni
get_prediction = True

# istanzia l'oggetto del modello 
print(f"Creating model: {args.model} (nb_classes={args.nb_classes})")
model = create_model(
    args.model,
    num_classes=args.nb_classes,
    drop_rate=0.0,
    drop_path_rate=args.drop_path,
    #attn_drop_rate=0.0,
    drop_block_rate=None,
    **args.__dict__
)

device = args.device

# Carica i pesi del checkpoint nel modello
checkpoint_path = "output/checkpoint-best_850video.pth"  
if os.path.exists(checkpoint_path):
    ckpt = torch.load(checkpoint_path, map_location="cpu")
    if "model" in ckpt:
        missing = model.load_state_dict(ckpt["model"], strict=False)
        print(f"Checkpoint loaded. Missing keys: {missing.missing_keys}")
    else:
        # Altri formati di caricamento possibili, a seconda di come hai salvato.
        model.load_state_dict(ckpt, strict=False)
    print("Checkpoint caricato correttamente.")
else:
    print("ATTENZIONE: file checkpoint non trovato. Userai i pesi random del modello.")


model.to(args.device)
model.eval()   


all_paths, all_preds, all_labels = get_path_pred_label(model, data_loader_val)

  from .autonotebook import tqdm as notebook_tqdm


Number of the class = 2
Creating model: vit_giant_patch14_224 (nb_classes=2)
Checkpoint loaded. Missing keys: []
Checkpoint caricato correttamente.


In [12]:
y_true = np.array(all_labels)
y_pred  = np.array(all_preds)
np.save("y_true.npy",y_true)
np.save("y_pred.npy",y_pred)


In [2]:
y_true = np.load("y_true.npy")
y_pred = np.load("y_pred.npy")

In [3]:

results = evaluate_binary_classifier(y_true, y_pred)
print("\n=== Metriche ===")
print(results)

=== Confusion Matrix ===
[[419  81]
 [181 319]]
True Positive: 319
True Negative: 419
False Positive: 81
False Negative: 181

=== Classification Report ===
              precision    recall  f1-score   support

         neg       0.70      0.84      0.76       500
         pos       0.80      0.64      0.71       500

    accuracy                           0.74      1000
   macro avg       0.75      0.74      0.74      1000
weighted avg       0.75      0.74      0.74      1000


=== Metriche ===
accuracy               0.738000
balanced_accuracy      0.738000
precision              0.797500
recall                 0.638000
f1                     0.708889
matthews_corrcoef      0.485815
cohen_kappa            0.476000
true_negatives       419.000000
false_positives       81.000000
false_negatives      181.000000
true_positives       319.000000
Name: metrics, dtype: float64


In [4]:
319/(319+181)

0.638

In [5]:
319/(319+81)

0.7975

In [7]:
(419+319)/1000

0.738