In [1]:
import pandas as pd
from scipy import stats
from itertools import combinations
from statsmodels.stats.multitest import multipletests

In [2]:
df = pd.read_csv('Results_TD_CP.csv')

metrics = ['dti_fa', 'md', 'ad', 'rd']
groups = ['TD_ND', 'TD_D', 'CSC_LA', 'CSC_MA', 'PV_LA', 'PV_MA']

In [3]:
pairs = list(combinations(groups, 2))

In [4]:
print(f"Post-Hoc Analysis: 6 Groups with FDR Correction")
print("-" * 50)

for metric in metrics:
    print(f"\n {metric}")
    subset = df[df['Metrics Name'] == metric]
    
    pair_labels = []
    p_values = []
    
    # Collecting P-values for all pairs
    for group1, group2 in pairs:
        data1 = subset[group1].dropna().values
        data2 = subset[group2].dropna().values
        
        if len(data1) == 0 or len(data2) == 0:
            p_val = 1.0
        else:
            # Mann-Whitney U test (Two-sided)
            stat, p_val = stats.mannwhitneyu(data1, data2, alternative='two-sided')
            
        p_values.append(p_val)
        pair_labels.append(f"{group1} vs {group2}")

    # FDR Correction (Benjamini-Hochberg)
    # alpha=0.05 means we accept a 5% False Discovery Rate
    reject, pvals_corrected, _, _ = multipletests(p_values, alpha=0.05, method='fdr_bh')
    
    # Printing ONLY Significant pairs
    found_sig = False
    for i in range(len(pairs)):
        if reject[i]: # If True, it is significant
            print(f" Significant, {pair_labels[i]:<20} | FDR p={pvals_corrected[i]:.2e} (Raw p={p_values[i]:.2e})")
            found_sig = True
            
    if not found_sig:
        print("No pairs met the FDR significance threshold.")

Post-Hoc Analysis: 6 Groups with FDR Correction
--------------------------------------------------

 dti_fa
 Significant, TD_ND vs CSC_MA      | FDR p=4.46e-03 (Raw p=2.97e-04)
 Significant, TD_ND vs PV_MA       | FDR p=5.20e-03 (Raw p=9.33e-04)
 Significant, TD_D vs CSC_MA       | FDR p=5.20e-03 (Raw p=1.04e-03)
 Significant, TD_D vs PV_MA        | FDR p=1.99e-02 (Raw p=5.31e-03)

 md
 Significant, TD_ND vs PV_LA       | FDR p=2.89e-02 (Raw p=7.69e-03)
 Significant, TD_ND vs PV_MA       | FDR p=5.77e-04 (Raw p=3.84e-05)
 Significant, TD_D vs PV_MA        | FDR p=2.19e-02 (Raw p=2.92e-03)
 Significant, CSC_LA vs PV_MA      | FDR p=2.53e-02 (Raw p=5.05e-03)

 ad
 Significant, TD_ND vs PV_LA       | FDR p=4.16e-03 (Raw p=5.55e-04)
 Significant, TD_ND vs PV_MA       | FDR p=1.92e-05 (Raw p=1.28e-06)
 Significant, TD_D vs PV_MA        | FDR p=7.54e-03 (Raw p=1.51e-03)

 rd
 Significant, TD_ND vs CSC_MA      | FDR p=8.67e-04 (Raw p=1.73e-04)
 Significant, TD_ND vs PV_LA       | FDR p=2.71e-