In [5]:
import pandas as pd
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report
)
from sklearn.metrics import confusion_matrix
import numpy as np

# AASIST-3

In [6]:
aasist3_test = pd.read_csv("../results/AASIST3/AASIST3_AFAD_test_scores.txt", sep="\t")
aasist3_test.columns=["speaker_id", "audio", "file_name", "label", "gender", "tts", "x", "predicted", "xx"]
aasist3_test['label'] = aasist3_test['label'].replace({'fake': 0, 'real': 1})

  aasist3_test['label'] = aasist3_test['label'].replace({'fake': 0, 'real': 1})


In [7]:
aasist3_eleven = aasist3_test.query("tts in ['eleven_multilingual_v2', 'none'] and label in [1, 0]")
aasist3_openai = aasist3_test.query("tts in ['gpt-4o-mini-tts', 'none'] and label in [1, 0]")
aasist3_minimax = aasist3_test.query("tts in ['speech-2.5-hd-preview', 'none'] and label in [1, 0]")

In [8]:
def compute_metrics(y_true, y_pred):
    """
    Compute precision, recall, accuracy, F1 micro and macro scores using sklearn.
    
    Parameters:
    -----------
    y_true : array-like
        Ground truth labels
    y_pred : array-like
        Predicted labels
    
    Returns:
    --------
    dict : Dictionary containing all metrics
    """
    # Compute accuracy
    accuracy = accuracy_score(y_true, y_pred)
    
    # Compute precision (micro and macro)
    precision_micro = precision_score(y_true, y_pred, average='micro', zero_division=0)
    precision_macro = precision_score(y_true, y_pred, average='macro', zero_division=0)
    
    # Compute recall (micro and macro)
    recall_micro = recall_score(y_true, y_pred, average='micro', zero_division=0)
    recall_macro = recall_score(y_true, y_pred, average='macro', zero_division=0)
    
    # Compute F1 (micro and macro)
    f1_micro = f1_score(y_true, y_pred, average='micro', zero_division=0)
    f1_macro = f1_score(y_true, y_pred, average='macro', zero_division=0)
    
    # Compute per-class metrics
    precision_per_class = precision_score(y_true, y_pred, average=None, zero_division=0)
    recall_per_class = recall_score(y_true, y_pred, average=None, zero_division=0)
    f1_per_class = f1_score(y_true, y_pred, average=None, zero_division=0)
    
    # Get unique classes
    classes = np.unique(np.concatenate([y_true, y_pred]))
    
    return {
        'accuracy': accuracy,
        'precision_micro': precision_micro,
        'precision_macro': precision_macro,
        'recall_micro': recall_micro,
        'recall_macro': recall_macro,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'per_class': {
            'precision': dict(zip(classes, precision_per_class)),
            'recall': dict(zip(classes, recall_per_class)),
            'f1': dict(zip(classes, f1_per_class))
        }
    }


def save_metrics_to_file(metrics, filename='metrics_results.txt'):
    """
    Save computed metrics to a text file.
    
    Parameters:
    -----------
    metrics : dict
        Dictionary containing metrics from compute_metrics function
    filename : str
        Name of the output file (default: 'metrics_results.txt')
    """
    with open(filename, 'w') as f:
        f.write("=" * 50 + "\n")
        f.write("CLASSIFICATION METRICS RESULTS\n")
        f.write("=" * 50 + "\n\n")
        
        # Write overall metrics
        f.write("Overall Metrics:\n")
        f.write("-" * 50 + "\n")
        f.write(f"Accuracy: {metrics['accuracy']:.4f}\n\n")
        
        f.write(f"Precision (Micro): {metrics['precision_micro']:.4f}\n")
        f.write(f"Precision (Macro): {metrics['precision_macro']:.4f}\n\n")
        
        f.write(f"Recall (Micro): {metrics['recall_micro']:.4f}\n")
        f.write(f"Recall (Macro): {metrics['recall_macro']:.4f}\n\n")
        
        f.write(f"F1 Score (Micro): {metrics['f1_micro']:.4f}\n")
        f.write(f"F1 Score (Macro): {metrics['f1_macro']:.4f}\n\n")
        
        # Write per-class metrics
        f.write("=" * 50 + "\n")
        f.write("Per-Class Metrics:\n")
        f.write("=" * 50 + "\n\n")
        
        for class_label in sorted(metrics['per_class']['precision'].keys()):
            f.write(f"Class {class_label}:\n")
            f.write(f"  Precision: {metrics['per_class']['precision'][class_label]:.4f}\n")
            f.write(f"  Recall:    {metrics['per_class']['recall'][class_label]:.4f}\n")
            f.write(f"  F1 Score:  {metrics['per_class']['f1'][class_label]:.4f}\n\n")
    
    print(f"Metrics saved to {filename}")

In [9]:
save_metrics_to_file(compute_metrics(aasist3_test['label'].to_list(), aasist3_test['predicted'].to_list()), '../results/AASIST3/AASIST3_AFAD_test_results.txt')
save_metrics_to_file(compute_metrics(aasist3_eleven['label'].to_list(), aasist3_eleven['predicted'].to_list()), '../results/AASIST3/AASIST3_eleven_results.txt')
save_metrics_to_file(compute_metrics(aasist3_openai['label'].to_list(), aasist3_openai['predicted'].to_list()), '../results/AASIST3/AASIST3_openai_results.txt')
save_metrics_to_file(compute_metrics(aasist3_minimax['label'].to_list(), aasist3_minimax['predicted'].to_list()), '../results/AASIST3/AASIST3_minimax_results.txt')

Metrics saved to ../results/AASIST3/AASIST3_AFAD_test_results.txt
Metrics saved to ../results/AASIST3/AASIST3_eleven_results.txt
Metrics saved to ../results/AASIST3/AASIST3_openai_results.txt
Metrics saved to ../results/AASIST3/AASIST3_minimax_results.txt


# Ad-hoc

In [10]:
fish = pd.read_csv("../results/AASIST3/AASIST3_fish_scores.txt", sep="\t")
fish.columns=["speaker_id", "audio", "file_name", "label", "gender", "tts", "x", "predicted", "xx"]
fish['label'] = fish['label'].replace({'fake': 0, 'real': 1})
save_metrics_to_file(compute_metrics(fish['label'].to_list(), fish['predicted'].to_list()), '../results/AASIST3/AASIST3_fish_test_results.txt')

Metrics saved to ../results/AASIST3/AASIST3_fish_test_results.txt


  fish['label'] = fish['label'].replace({'fake': 0, 'real': 1})


In [11]:
xtts = pd.read_csv("../results/AASIST3/AASIST3_xtts_scores.txt", sep="\t")
xtts.columns=["speaker_id", "audio", "file_name", "label", "gender", "tts", "x", "predicted", "xx"]
xtts['label'] = xtts['label'].replace({'fake': 0, 'real': 1})
save_metrics_to_file(compute_metrics(xtts['label'].to_list(), xtts['predicted'].to_list()), '../results/AASIST3/AASIST3_xtts_test_results.txt')

Metrics saved to ../results/AASIST3/AASIST3_xtts_test_results.txt


  xtts['label'] = xtts['label'].replace({'fake': 0, 'real': 1})


In [12]:
mms = pd.read_csv("../results/AASIST3/AASIST3_mms_scores.txt", sep="\t")
mms.columns=["speaker_id", "audio", "file_name", "label", "gender", "tts", "x", "predicted", "xx"]
mms['label'] = mms['label'].replace({'fake': 0, 'real': 1})
save_metrics_to_file(compute_metrics(mms['label'].to_list(), mms['predicted'].to_list()), '../results/AASIST3/AASIST3_mms_test_results.txt')

Metrics saved to ../results/AASIST3/AASIST3_mms_test_results.txt


  mms['label'] = mms['label'].replace({'fake': 0, 'real': 1})


In [13]:
t5 = pd.read_csv("../results/AASIST3/AASIST3_T5_scores.txt", sep="\t")
t5.columns=["speaker_id", "audio", "file_name", "label", "gender", "tts", "x", "predicted", "xx"]
t5['label'] = t5['label'].replace({'fake': 0, 'real': 1})
save_metrics_to_file(compute_metrics(t5['label'].to_list(), t5['predicted'].to_list()), '../results/AASIST3/AASIST3_T5_test_results.txt')

Metrics saved to ../results/AASIST3/AASIST3_T5_test_results.txt


  t5['label'] = t5['label'].replace({'fake': 0, 'real': 1})
