# Get Anatomical Regions (by %) for significant group map clusters identified with FSL's cluster command


The purpose of this notebook is to take significant clusters from group analysis outputs (whole brain analyses), and describe the anatomical parcellation that makes up the clusters, by percent. 

See relevant lab wiki here: https://github.mit.edu/Saxelab/fmri_analysis/wiki/thresholding-by-significance-and-cluster-size

For example, say you have a significant clustes from a whole-brain group analysis. This script will say, for each cluster, X% of the cluster is in anatomical region A, Y% of the cluster is in anatomical region B. Etc. 


It is expected that you have a standard anatomical parcellation in the same template space as your data. Recommended is the Desikan-Killiany atlas, which you can access in our template space (MNI152NLin6Asym) here: 

https://github.com/neurodata/neuroparc 

> atlases/label/Human/Desikan_space-MNI152NLin6_res-2x2x2.nii.gz 
> atlases/label/Human/Anatomical-labels-csv/Desikan.csv 



In [None]:
import os
import os.path as op
import shutil
import glob
from nilearn import image
from nilearn import masking
import numpy as np
import scipy.stats
import pandas as pd
import datetime
import subprocess

In [None]:
proj_dir = '/om2/group/saxelab/NES_fMRI/study2'

tier_dir = op.join(proj_dir,'TIER')


#group_analysis_dir = op.join(tier_dir,'analysis_data','group_analysis')
group_analysis_dir = op.join(tier_dir)


In [None]:
# set paths to files (outputs from FSL's cluster command, specifically the indices i.e. atlas of cluster identities)

files = ['analysis_data/group_analysis/randomise_spWM_stimuli-gt-fixation/thresholded_tstat1_cluster_indices.nii.gz', 
         'analysis_data/group_analysis/randomise_DOTS_soc-gt-phys/thresholded_tstat1_cluster_indices.nii.gz', 
         'analysis_data/group_analysis/randomise_DOTS_phys-gt-soc/thresholded_tstat1_cluster_indices.nii.gz']

# NOTE: atlas expected to be in folder according to atlas name (e.g., in a folder data/atlases/Desikan, 
# which would contain the template nii file and labels CSV below) 

atlas_folder = 'Desikan'
atlas_template = 'Desikan_space-MNI152NLin6_res-2x2x2.nii.gz'
atlas_labels = 'Desikan.csv'



In [None]:

atlas_path = op.join(proj_dir, 'data/atlases', atlas_folder)
matches_atlas = glob.glob(op.join(atlas_path, atlas_template))
matches_csv = glob.glob(op.join(atlas_path, atlas_labels))

atlas_path = matches_atlas[0]
atlas_labels_path = matches_csv[0]

aicha_img = image.load_img(atlas_path)
aicha = aicha_img.get_fdata()
aicha_labels = pd.read_csv(atlas_labels_path, header=None)
aicha_labels.columns = ['index', 'label']

print('____________________ATLAS: ' + atlas_folder + '_____________________________')


for clustermap_path in files: 

    # get name of test  
    #verbose_name = clustermap_path.split('/')[0]
    verbose_name = clustermap_path.split('/')[2] # changed for new paths, because ../ dir varies and is now in file names
    test_name = verbose_name.split('randomise_')[1]

    # get the path, load the image and data
    fullimg_path = op.join(group_analysis_dir, clustermap_path)
    clustermap_img = image.load_img(fullimg_path)
    clustermap_data = clustermap_img.get_fdata()

    # get the cluster index values (in descending order: '[::-1]')
    clustermap_values = np.sort(pd.unique(np.ravel(clustermap_data)))[::-1]

    print('\n\n\n TEST: ', test_name, '\n')

    # for each cluster in the clustermap... 
    for cluster_index in clustermap_values:

        # don't count 0 (empty / non-cluster voxels) 
        if cluster_index > 0:

            # get a binary mask (start w/ boolean) for current cluster 
            boolean_clustermask = clustermap_data == cluster_index
            binary_clustermask = boolean_clustermask.astype(int)

            # make an image of this cluster
            binary_clustermask_img = image.new_img_like(clustermap_img, binary_clustermask)

            # mask the atlas with the binary cluster mask 
            clustermasked_atlas = image.math_img("img1 * img2", img1 = aicha_img, img2 = binary_clustermask_img)
            # load the data 
            clustermasked_atlas_data = clustermasked_atlas.get_fdata()

            # get number of voxels in cluster (nonzero)
            n_nonzero_voxels_in_cluster = sum(np.ravel(clustermasked_atlas_data > 0).astype(int))


            # if cluster threshold is met 
            if n_nonzero_voxels_in_cluster >= 1: 

                # get the unique values in the clustermasked atlas (which regions are "in" this cluster)
                masked_atlas_values = np.sort(pd.unique(np.ravel(clustermasked_atlas_data)))


        #        print('\n\n')
         #       print('CLUSTER INDEX: ', str(cluster_index))

                # for each region/val in cluster_masked_aicha:
                for region_index in masked_atlas_values: 

                    # exclude 0
                    if region_index > 0:

                        # get proportion of cluster in this region 
                        nvoxels_in_region = sum(np.ravel(clustermasked_atlas_data == region_index).astype(int))

                        proportion_nvoxels_in_region = round(100*nvoxels_in_region/n_nonzero_voxels_in_cluster, 2)



                        # get the label 
                        label = np.array(aicha_labels[aicha_labels['index'] == region_index])[0][1]

                        if proportion_nvoxels_in_region > 0:

                            df_currentrow = pd.DataFrame({
                                'test': test_name,
                                'cluster_idx': cluster_index,
                                'value': proportion_nvoxels_in_region, 
                                'label': label,
                                'str': str(proportion_nvoxels_in_region) + '% of cluster is in: ' + label
                            }, index=[0])


                            df_all = df_all.append(df_currentrow, ignore_index=True)


In [None]:
df_all = df_all.sort_values(by = ['test', 'cluster_idx', 'value'], axis=0,ignore_index=True, ascending = [True, False, False], na_position = 'first')

In [None]:
fname_out = op.join(group_analysis_dir, 'RANDOMISE_CLUSTERS_PER_TEST.csv')
df_all.to_csv(fname_out, index=False, header='column_names')

In [None]:
df_all = pd.read_csv(fname_out)

In [None]:
table_format_df = pd.DataFrame()


for test in pd.unique(df_all['test']).tolist():
    data_test = df_all[df_all['test'] == test]
    for cluster_idx in pd.unique(data_test['cluster_idx']).tolist():
        data_cluster = data_test[data_test['cluster_idx'] == cluster_idx]
        cluster_str = ''
        
        for index, row in data_cluster.iterrows():
            cur_percent = str(row['value'])
            cur_region = row['label'].replace('_', ' ')
            cur_str = cur_percent + '% ' + cur_region
            if cluster_idx != 1:
                cur_str += '; '
                
            cluster_str += cur_str
            
        df_currentrow = pd.DataFrame({
                                    'test': test,
                                    'str': cluster_str,
                                    'cluster_idx': cluster_idx,
                                }, index=[0])

                                
        table_format_df = table_format_df.append(df_currentrow, ignore_index=True)
      
    
    
fname_out = op.join(group_analysis_dir, 'RANDOMISE_CLUSTERS_PER_TEST_formattedStrings.csv')
table_format_df.to_csv(fname_out, index=False, header='column_names')

