This module is used for score fusion

In [2]:
import os
import sys
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Average score-fusion

In [7]:
import os

DATA_DIR = "/nvme1/hungdx/Lightning-hydra/data/huggingface_benchrmark_Speech-DF-Arena"
SCORE_DIR = "/nvme1/hungdx/Lightning-hydra/logs/results/huggingface_benchrmark_Speech-DF-Arena"
SCORE_FUSION_OUT = "fused_scores2"
LIST_OF_SCORES = ["ConformerTCM_MDT_2B_LargeCorpus_e18", "ConformerTCM_MDT_LoRA_LargeCorpus_MoreElevenlabs"]
LIST_OF_BENCHMARK_DATASETS = os.listdir(DATA_DIR)
LIST_OF_BENCHMARK_DATASETS = [f for f in LIST_OF_BENCHMARK_DATASETS if os.path.isdir(os.path.join(DATA_DIR, f))]



def read_scores(file_path):
    """Read scores from a file and return utterance IDs and scores."""
    scores = []
    with open(file_path, 'r') as f:
        for line in f:
            if line.strip():  # Skip empty lines
                try:
                    # Assuming format: utterance_id spoof_score bonafide_score
                    parts = line.strip().split()
                    if len(parts) >= 3:  # Ensure there are enough values
                        utt, spoof, bonafide = parts[0], float(parts[1]), float(parts[2])
                        scores.append((utt, spoof, bonafide))
                except (ValueError, IndexError):
                    print(f"Skipping invalid line in {file_path}: {line.strip()}")
                    continue
    return scores

def perform_fusion(benchmark_dataset):
    """Perform average score fusion for a benchmark dataset."""
    all_scores = []
    
    # Read scores from each model
    for model_name in LIST_OF_SCORES:
        try:
            # Find score file that starts with benchmark_dataset
            score_dir = os.path.join(SCORE_DIR, model_name)
            score_file = [f for f in os.listdir(score_dir) if f.startswith(benchmark_dataset)]
            
            if not score_file:
                print(f"No score file found for {benchmark_dataset} in {model_name}")
                continue
                
            score_file = os.path.join(score_dir, score_file[0])
            
            if os.path.exists(score_file):
                print(f"Processing {score_file}...")
                scores = read_scores(score_file)
                if scores:
                    print(f"Found {len(scores)} scores for {score_file}")
                    all_scores.append(scores)
                else:
                    print(f"No valid scores found in {score_file}")
            else:
                print(f"Score file not found: {score_file}")
                
        except (OSError, IndexError) as e:
            print(f"Error processing {model_name} for {benchmark_dataset}: {str(e)}")
            continue
    
    if not all_scores:
        print(f"No valid scores found for {benchmark_dataset}")
        return None
    
    # Ensure all models have the same number of scores
    min_scores = min(len(scores) for scores in all_scores)
    all_scores = [scores[:min_scores] for scores in all_scores]
    
    # Calculate average scores, preserving utterance IDs
    fused_scores = []
    for i in range(min_scores):
        # Use utterance ID from the first model's scores
        utt = all_scores[0][i][0]
        avg_spoof = sum(scores[i][1] for scores in all_scores) / len(all_scores)
        avg_bonafide = sum(scores[i][2] for scores in all_scores) / len(all_scores)
        fused_scores.append((utt, avg_spoof, avg_bonafide))
    
    return fused_scores

# Create output directory for fused scores
output_dir = os.path.join(SCORE_DIR, SCORE_FUSION_OUT)
os.makedirs(output_dir, exist_ok=True)

# Process each benchmark dataset
for dataset in LIST_OF_BENCHMARK_DATASETS:
    print(f"Processing {dataset}...")
    fused_scores = perform_fusion(dataset)
    
    if fused_scores:
        # Save fused scores with utterance ID
        output_file = os.path.join(output_dir, f"{dataset}_fused.txt")
        with open(output_file, 'w') as f:
            for utt, spoof, bonafide in fused_scores:
                f.write(f"{utt} {spoof:.6f} {bonafide:.6f}\n")
        print(f"Saved fused scores to {output_file}")
    else:
        print(f"Failed to generate fused scores for {dataset}")

print("Score fusion completed!")

Processing ASVspoof2021_DF_eval...
Processing /nvme1/hungdx/Lightning-hydra/logs/results/huggingface_benchrmark_Speech-DF-Arena/ConformerTCM_MDT_2B_LargeCorpus_e18/ASVspoof2021_DF_eval_cnsl_xlsr_2b_conformertcm_layerwise_24_mdt_large_corpus_ConformerTCM_MDT_2B_LargeCorpus_e18.txt...
Found 533928 scores for /nvme1/hungdx/Lightning-hydra/logs/results/huggingface_benchrmark_Speech-DF-Arena/ConformerTCM_MDT_2B_LargeCorpus_e18/ASVspoof2021_DF_eval_cnsl_xlsr_2b_conformertcm_layerwise_24_mdt_large_corpus_ConformerTCM_MDT_2B_LargeCorpus_e18.txt
Processing /nvme1/hungdx/Lightning-hydra/logs/results/huggingface_benchrmark_Speech-DF-Arena/ConformerTCM_MDT_LoRA_LargeCorpus_MoreElevenlabs/ASVspoof2021_DF_eval_cnsl_lora_elevenlabs_xlsr_conformertcm_mdt_more_elevenlabs_ConformerTCM_MDT_LoRA_LargeCorpus_MoreElevenlabs.txt...
Found 533928 scores for /nvme1/hungdx/Lightning-hydra/logs/results/huggingface_benchrmark_Speech-DF-Arena/ConformerTCM_MDT_LoRA_LargeCorpus_MoreElevenlabs/ASVspoof2021_DF_eval_cns