In [1]:
import numpy as np

def top_k_precision(y_true, y_scores, k):
    """
    Calculate top-k precision for multilabel classification.

    Parameters:
    y_true (numpy array): Binary matrix of true labels (shape: n_samples x n_classes).
    y_scores (numpy array): Matrix of predicted scores (shape: n_samples x n_classes).
    k (int): Number of top elements to consider for calculating precision.

    Returns:
    float: Mean top-k precision across all samples.
    """
    n_samples = y_true.shape[0]
    top_k_precisions = []

    for i in range(n_samples):
        # Get the indices of the top-k predictions
        top_k_indices = np.argsort(y_scores[i])[-k:]
        
        # Calculate precision for this sample
        precision = np.sum(y_true[i, top_k_indices]) / k
        top_k_precisions.append(precision)
    
    return np.mean(top_k_precisions)

# Example: Ground truth binary matrix
y_true = np.array([
    [1, 0, 0, 1, 0],
    [0, 1, 1, 0, 0],
    [1, 1, 0, 0, 0]
])

# Example: Predicted scores from the model
y_scores = np.array([
    [0.8, 0.3, 0.2, 0.1, 0.1],
    [0.1, 0.7, 0.6, 0.3, 0.2],
    [0.9, 0.8, 0.1, 0.4, 0.3]
])

# Calculate top-2 precision
k = 2
precision_at_k = top_k_precision(y_true, y_scores, k)
print("Top-2 Precision:", precision_at_k)


Top-2 Precision: 0.8333333333333334


In [2]:
print(y_scores[0])
print(np.argsort(y_scores[0]))
indices = np.argsort(y_scores[0])[-2:]
print(indices)

[0.8 0.3 0.2 0.1 0.1]
[3 4 2 1 0]
[1 0]


In [8]:

from sklearn.metrics import roc_auc_score, precision_score, f1_score
from sklearn.metrics import classification_report, average_precision_score
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix 
from sklearn.metrics import accuracy_score, hamming_loss, precision_score, recall_score, f1_score, roc_auc_score
import torch

def evaluate(Y, pred_Y, k):
    preds = torch.sigmoid(torch.tensor(pred_Y))
    binary_preds = (preds > 0.5).float()
    
    # Ground truth
    true_labels = Y
    
    # Convert to CPU and numpy for sklearn metrics
    true_labels_np = true_labels
    
    binary_preds_np = binary_preds
    preds_np = preds
    
    # Calculate accuracy
    accuracy = accuracy_score(true_labels_np, binary_preds_np)
    
    # Calculate Hamming loss
    hamming = hamming_loss(true_labels_np, binary_preds_np)
    
    # Calculate precision, recall, F1 score for micro and macro averaging
    precision_micro = precision_score(true_labels_np, binary_preds_np, average='micro', zero_division=0)
    recall_micro = recall_score(true_labels_np, binary_preds_np, average='micro', zero_division=0)
    f1_micro = f1_score(true_labels_np, binary_preds_np, average='micro', zero_division=0)
    
    precision_macro = precision_score(true_labels_np, binary_preds_np, average='macro', zero_division=0)
    recall_macro = recall_score(true_labels_np, binary_preds_np, average='macro', zero_division=0)
    f1_macro = f1_score(true_labels_np, binary_preds_np, average='macro', zero_division=0)
    
    # Calculate AUC only if there are both positive and negative samples for each label
    try:
        auc = roc_auc_score(true_labels_np, preds_np, average='macro', multi_class='ovr') if len(np.unique(true_labels_np)) > 1 else 0
    except ValueError:
        auc = 0
    
    return {
        'accuracy': accuracy,
        'micro_precision': precision_micro,
        'macro_precision': precision_macro,
        'recall': recall_macro,
        'f1_score': f1_macro,
        'auc': auc,
        'top_k_pred': top_k_precision(true_labels_np, preds, k),
    }

evaluate(y_true, y_scores, k=3)

{'accuracy': 0.0,
 'micro_precision': 0.4,
 'macro_precision': 0.39999999999999997,
 'recall': 0.8,
 'f1_score': 0.52,
 'auc': 0,
 'top_k_pred': 0.5555555555555555}