In [None]:
import glob
import subprocess
from os.path import exists
import pandas as pd

group_stats_path = '/Volumes/MyPassport/Hailey_data_preproc/fMRI_data/group_analyses/group_stats_3May22_N23_motionRegressors_Z3.1_p0.001_FLAME1+2_noTempDerivs_modelv3.gfeat/'
all_zstat_dir = glob.glob(group_stats_path + 'cope*.feat')

In [None]:
def format_cluster_output(output, zstat_num, stat_type):
    # output = raw output of cluster function;
    # zstat_num = number corresponding to cope image
    # zstat_type = "activation" or "deactivation", depending on type of statistical image
    t = [x.split('\t') for x in output.decode('utf-8').split('\n')]
    t = pd.DataFrame(t[1:-1], columns=t[0]) # omit first and last lines; these are col names & blank line respectively
    t['zstat'] = zstat_num
    t['stat_type'] = stat_type
    return t

In [None]:
all_cluster_stats = pd.DataFrame(columns=['zstat','stat_type','Cluster Index', 'Voxels', 'MAX', 'MAX X (vox)', 'MAX Y (vox)',
       'MAX Z (vox)', 'COG X (vox)', 'COG Y (vox)', 'COG Z (vox)'])
for path in all_zstat_dir:
    zstat_num = int(path.split('/')[-1].strip('.feat').strip('cope'))
    # print("Running cluster stats for zstat ", zstat_num)

    im1 = path + '/stats/zstat1.nii.gz'
    im2 = path + '/stats/zstat2.nii.gz'
    
    if exists(im1) and exists(im2):
        output = subprocess.check_output(['cluster','-i',im1,'-t','0.0001'])
        t1 = format_cluster_output(output, zstat_num, 'activation')

        output = subprocess.check_output(['cluster','-i',im2,'-t','0.0001'])
        t2 = format_cluster_output(output, zstat_num, 'deactivation')
        
        all_cluster_stats = pd.concat([all_cluster_stats, t1], ignore_index=True)
        all_cluster_stats = pd.concat([all_cluster_stats, t2], ignore_index=True)

In [None]:
# Constrain to only the top 10 biggest clusters from each zstat image
all_cluster_stats['Cluster Index'] = [int(x) for x in all_cluster_stats['Cluster Index']]
top_clusters = all_cluster_stats.loc[all_cluster_stats['Cluster Index']<11,:]
top_clusters = top_clusters.sort_values(by=['zstat','Cluster Index','stat_type'])
top_clusters = top_clusters.reset_index()
top_clusters

In [None]:
# Save to group feat path
top_clusters.loc[top_clusters.stat_type=='activation',:].to_csv(group_stats_path+'/activation-cluster-stats-top10-thresh0.0001.csv')
top_clusters.loc[top_clusters.stat_type=='deactivation',:].to_csv(group_stats_path+'/deactivation-cluster-stats-top10-thresh0.0001.csv')