In [37]:
# assess if there's statistical difference between groups
# change folder depending on which seed config you're performing analysis
%cd /home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical/derivatives/output_AFM_30
!pwd

/home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical/derivatives/output_AFM_30
/home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical/derivatives/output_AFM_30


In [29]:
import scipy.stats as stats 
import numpy as np
import pandas as pd
import os, re
import glob
from cliffs_delta import cliffs_delta

In [30]:
def get_spearman_correlation(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        # given second line is spearman corr
        spearman_line = lines[1].strip()
        match = re.search(r"spearman_corr:\s*(-?[\d\.]+)", spearman_line)

        if match:
            return float(match.group(1))
        else:
            print(f"Spearman correlation not found in file {file_path}")

In [38]:
# function to compute mean corr and to perform non-parametric t test (mann-whitney u)
def compute_var_correlations(directory):
    file_pattern = os.path.join(directory, "*.txt")
    files = glob.glob(file_pattern)
    
    groups = {'CTRL': [], 'SCZ': [], 'BPLR': [], 'ADHD': []}

    for file_path in files:
        file_name = os.path.basename(file_path)

        # Skip the mean_spearman_correlations.txt file
        if file_name == "mean_spearman_correlations.txt":
            continue

        correlation = get_spearman_correlation(file_path)

        # Skip if correlation is None
        if correlation is None:
            continue
        
        # Determine the subject group based on the filename
        subject_id = re.search(r"sub-(\d+)_", file_name).group(1)
        
        if subject_id.startswith('1'):
            groups['CTRL'].append(correlation)
        elif subject_id.startswith('5'):
            groups['SCZ'].append(correlation)
        elif subject_id.startswith('6'):
            groups['BPLR'].append(correlation)
        elif subject_id.startswith('7'):
            groups['ADHD'].append(correlation)

    # Check if the control group has any data
    if len(groups['CTRL']) == 0:
        raise ValueError("No data found for the CTRL group.")
    
    vars = stats.bartlett(groups['CTRL'], groups['SCZ'], groups['BPLR'], groups['ADHD'])

    u_scz = stats.mannwhitneyu(groups['CTRL'], groups['SCZ'])
    u_bplr = stats.mannwhitneyu(groups['CTRL'], groups['BPLR'])
    u_adhd = stats.mannwhitneyu(groups['CTRL'], groups['ADHD'])

    d_scz = cliffs_delta(groups['CTRL'], groups['SCZ'])
    d_bplr = cliffs_delta(groups['CTRL'], groups['BPLR'])
    d_adhd = cliffs_delta(groups['CTRL'], groups['ADHD'])
    
    # Calculate the variance for the control group
    var_ctrl = np.var(groups['CTRL'])
    
    variances = {}
    
    for group_name in ['SCZ', 'BPLR', 'ADHD']:
        if len(groups[group_name]) > 0:
            # Calculate the variance for the current group
            variances[group_name] = np.var(groups[group_name])
        else:
            variances[group_name] = float('nan')
    
    # Variance differences between the groups and CTRL
    var_scz = variances['SCZ']
    var_bplr = variances['BPLR']
    var_adhd = variances['ADHD']

    return d_scz, d_bplr, d_adhd, u_scz, u_bplr, u_adhd

In [39]:
# directory with text files
directory = os.getcwd()

# compute mean correlations
d_scz, d_bplr, d_adhd, u_scz, u_bplr, u_adhd = compute_var_correlations(directory)

"""output_file = "mean_spearman_correlations.txt"
with open(output_file, 'w') as f:
    f.write(f"Overall Mean Spearman Correlation: {overall_mean}\n")
    for group, mean in group_means.items():
        f.write(f"Mean Spearman Correlation for {group}: {mean}\n")

print(f"Results saved to {output_file}")"""

'output_file = "mean_spearman_correlations.txt"\nwith open(output_file, \'w\') as f:\n    f.write(f"Overall Mean Spearman Correlation: {overall_mean}\n")\n    for group, mean in group_means.items():\n        f.write(f"Mean Spearman Correlation for {group}: {mean}\n")\n\nprint(f"Results saved to {output_file}")'

In [40]:
print(u_scz)
print(u_bplr)
print(u_adhd)

MannwhitneyuResult(statistic=906.0, pvalue=0.18941467328837136)
MannwhitneyuResult(statistic=1518.0, pvalue=0.5948537141132926)
MannwhitneyuResult(statistic=1180.0, pvalue=0.6089491058069196)


In [42]:
print(d_scz)
print(d_bplr)
print(d_adhd)

(0.1984126984126984, 'small')
(0.06302521008403361, 'negligible')
(-0.06349206349206349, 'negligible')


In [74]:
print(vars)

BartlettResult(statistic=1.3640943759122648, pvalue=0.7139723633202233)
