In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score
import os

def compute_metrics(true_labels, pred_labels):
    """
    Calculate classification evaluation metrics
    """
    acc = accuracy_score(true_labels, pred_labels)
    kappa = cohen_kappa_score(true_labels, pred_labels)
    weighted_f1 = f1_score(true_labels, pred_labels, average='weighted')
    macro_f1 = f1_score(true_labels, pred_labels, average='macro')
    return acc, kappa, weighted_f1, macro_f1

def get_dataset_name(filename):
    """
    Extract dataset name from filename
    Examples:
    BoneMarrowA_BoneMarrowB_ref0_query1.csv -> BoneMarrowA_BoneMarrowB
    BoneMarrowA_BoneMarrowB_ref1_query0.csv -> BoneMarrowB_BoneMarrowA
    """
    # First remove the .csv extension
    name = filename.replace('.csv', '')
    
    # Split the filename
    parts = name.split('_')
    
    # Find the position of ref and query
    ref_idx = -1
    for i, part in enumerate(parts):
        if part.startswith('ref'):
            ref_idx = i
            break
    
    if ref_idx == -1:
        return name  # If ref is not found, return original name
    
    # Extract base name (remove ref and query parts)
    base_parts = parts[:ref_idx]
    
    # Check if it's ref1_query0 format
    if 'ref1_query0' in filename:
        # Swap the first and last parts
        if len(base_parts) >= 2:
            base_parts[0], base_parts[-1] = base_parts[-1], base_parts[0]
    
    return '_'.join(base_parts)

def process_files(directory):
    """
    Process all CSV files in the directory and calculate metrics
    """
    results = []
    
    # Get all CSV files
    csv_files = [f for f in os.listdir(directory) if f.endswith('.csv') and 'query' in f]
    
    for file in csv_files:
        try:
            print(f"Processing file {file}")
            
            # Extract dataset name
            base_name = get_dataset_name(file)
            
            # Read CSV file, selecting only the needed columns
            df = pd.read_csv(os.path.join(directory, file))
            
            # Check if required columns exist
            required_columns = ['true_label', 'predicted_label']
            if not all(col in df.columns for col in required_columns):
                print(f"Warning: {file} is missing required columns (true_label, predicted_label), skipping this file")
                continue
            
            # Calculate metrics
            acc, kappa, w_f1, m_f1 = compute_metrics(
                df['true_label'].values,
                df['predicted_label'].values
            )
            
            # Store results
            results.append({
                'Dataset': base_name,
                'ACC': acc,
                'Kappa': kappa,
                'Weighted_F1': w_f1,
                'Macro_F1': m_f1
            })
            
        except Exception as e:
            print(f"Error processing file {file}: {str(e)}")
            continue
    
    if not results:
        print("Warning: No files were successfully processed")
        return None
    
    # Get folder name
    folder_name = os.path.basename(os.path.abspath(directory))
    
    # Create results DataFrame and save as CSV
    results_df = pd.DataFrame(results)
    output_filename = f"{folder_name}.csv"
    # results_df.to_csv(output_filename, index=False)
    # print(f"Results saved to {output_filename}")
    return results_df

In [4]:
results = process_files("/mnt/d/Desktop/R/Inter_plat/scDIFF_res") # This folder should include all CSVs like BoneMarrowA_BoneMarrowB_ref0_query1.csv

Processing file MosA1_Cerebellum_ref0_query1.csv
Processing file MosA1_MouseBrain(10x)_ref0_query1.csv
Processing file MosA1_PreFrontalCortex_ref0_query1.csv
Processing file MosA1_WholeBrainA_ref0_query1.csv
Processing file MosA1_WholeBrainB_ref0_query1.csv
Processing file MosM1_Cerebellum_ref0_query1.csv
Processing file MosM1_MouseBrain(10x)_ref0_query1.csv
Processing file MosM1_PreFrontalCortex_ref0_query1.csv
Processing file MosM1_WholeBrainA_ref0_query1.csv
Processing file MosM1_WholeBrainB_ref0_query1.csv
Processing file MosP1_Cerebellum_ref0_query1.csv
Processing file MosP1_MouseBrain(10x)_ref0_query1.csv
Processing file MosP1_PreFrontalCortex_ref0_query1.csv
Processing file MosP1_WholeBrainA_ref0_query1.csv
Processing file MosP1_WholeBrainB_ref0_query1.csv
Processing file MouseBrain(10x)_Cerebellum_ref0_query1.csv
Processing file MouseBrain(10x)_PreFrontalCortex_ref0_query1.csv
Processing file MouseBrain(10x)_WholeBrainA_ref0_query1.csv
Processing file MouseBrain(10x)_WholeBrain

In [5]:
results

Unnamed: 0,Dataset,ACC,Kappa,Weighted_F1,Macro_F1
0,MosA1_Cerebellum,0.797035,0.714262,0.798417,0.49355
1,MosA1_MouseBrain(10x),0.959367,0.936826,0.960461,0.955876
2,MosA1_PreFrontalCortex,0.792943,0.61305,0.796987,0.548534
3,MosA1_WholeBrainA,0.691244,0.605868,0.671315,0.607023
4,MosA1_WholeBrainB,0.655663,0.575486,0.630792,0.594088
5,MosM1_Cerebellum,0.814139,0.735629,0.800248,0.488531
6,MosM1_MouseBrain(10x),0.974639,0.96031,0.975033,0.978358
7,MosM1_PreFrontalCortex,0.83133,0.662201,0.821282,0.573996
8,MosM1_WholeBrainA,0.677065,0.590501,0.65944,0.603089
9,MosM1_WholeBrainB,0.675014,0.602548,0.653144,0.605078
