In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind, mannwhitneyu
import os

def compare_histograms_with_stats_and_save(filepath, save_dir='output'):
    # Load the dataset
    data = pd.read_csv(filepath)
    
    # Correct the MOCI scores
    data['MOCI_corrected'] = 30 - data['MOCI']
    
    # Ensure save directory exists
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    
    # Define the metrics to compare
    columns_to_compare = [
        '2.AGE', '3-2. YR_EDU', 'STAI-X-1', 'STAI-X-2', 'HADS_anxiety', 'HADS_depression', 
        'SWLS', 'GAD-7', 'PDSS', 'LSAS_performance', 'LSAS_social_interaction', 
        'LSAS', 'MOCI','MOCI_corrected', 'BFNE', 'PSWQ', 'Handedness(true)', 'FCV-19S'
    ]
    
    # Perform statistical tests and generate histograms
    exp_data = data[data['GROUP'] == 'EXP']
    hc_data = data[data['GROUP'] == 'HC']
    
    for column in columns_to_compare:
        exp_values = exp_data[column].dropna()
        hc_values = hc_data[column].dropna()
        
        t_stat, p_val = ttest_ind(exp_values, hc_values)
        u_stat, u_p_val = mannwhitneyu(exp_values, hc_values)
        
        exp_mean = exp_values.mean()
        exp_std = exp_values.std()
        hc_mean = hc_values.mean()
        hc_std = hc_values.std()
        
        plt.figure(figsize=(10, 10))
        
        # Combined histogram
        plt.hist(exp_values, bins=30, color='blue', alpha=0.5, label='EXP')
        plt.hist(hc_values, bins=30, color='green', alpha=0.5, label='HC')
        plt.title(
            f'{column} - EXP vs HC\n'
            f'EXP: mean={exp_mean:.2f}, std={exp_std:.2f}\n'
            f'HC: mean={hc_mean:.2f}, std={hc_std:.2f}\n'
            f't-statistic={t_stat:.2f}, p-value={p_val:.3e}\n'
            #f'U-statistic={u_stat:.2f}, U p-value={u_p_val:.3e}'
        )
        plt.xlabel(column)
        plt.ylabel('Frequency')
        plt.legend()
        
        # Save the histogram as a PNG file
        plt.savefig(os.path.join(save_dir, f'{column}_EXP_vs_HC.png'))
        plt.close()

# Usage example:
# compare_histograms_with_stats_and_save('/mnt/data/participant_demo_clinical_all.csv')
compare_histograms_with_stats_and_save('./data/participant_demo_clinical_all.csv')