# Cluster Analysis
To alter which data set is being analyzed, I alter what data is loaded into clfmaps and prepostmaps, plus target/source file paths for intermediate analyses until the final result of a thresholded TFCE image is computed.

## Setup

### Dependencies

In [None]:
import warnings
warnings.simplefilter('ignore')

import matplotlib.pyplot as plt
from nilearn import plotting
from nilearn import image
import os
import numpy as np
from brainiak.searchlight.searchlight import Searchlight
from brainiak.searchlight.searchlight import Cube
from scipy import stats
from scipy.stats.stats import pearsonr
from scipy.stats.stats import ttest_1samp
import nibabel as nib
import subprocess
import statistics
from tqdm import tqdm


SAVE_PATH = '/jukebox/ntb/projects/sketchloop02/data/searchlight_output'
standard_reference_path = '/jukebox/ntb/projects/sketchloop02/searchlight_outputs/MNI152_T1_2mm_brain.nii.gz'

## subject list
sub_list = ['0110171', '0110172', '0111171', '0112171', '0112172', '0112173',
            '0113171', '0115174', '0117171', '0118171', '0118172', '0119171',
            '0119172', '0119173', '0119174', '0120171', '0120172', '0120173',
            '0123171', '0123173', '0124171', '0125171', '0125172', '1121161',
            '1130161', '1202161', '1203161', '1206161', '1206162', '1206163',
            '1207162']

rois = ['V1',
 'V2',
 'LOC',
 'IT',
 'fusiform',
 'parahippo',
 'PRC',
 'ento',
 'hipp',
 'mOFC']
maskpath= os.path.join(SAVE_PATH, '1207162_neurosketch_{}_standard.nii.gz')
sl_rad=1

## collect target selectivity maps
clfmaps = [image.load_img(os.path.join(SAVE_PATH, s+'_standard_acc_searchlight.nii.gz')).get_data() for s in sub_list]
clfaffine = image.load_img(os.path.join(SAVE_PATH, sub_list[0]+'_standard_acc_searchlight.nii.gz')).affine

## collect prepost rsa maps
prepostmaps = []
for sub in sub_list:
    prepostmaps.append(image.load_img(os.path.join(SAVE_PATH, sub+'_standard_prepostclf_34_searchlight.nii.gz')).get_data())
    mask = image.load_img(standard_reference_path).get_data()
    mask = mask.astype(bool)
    prepostmaps[-1][~mask] = np.nan
prepostaffine = image.load_img(os.path.join(SAVE_PATH, sub_list[0]+'_standard_prepostclf_34_searchlight.nii.gz')).affine

# arrange data to be distributed to searchlight
subject_data = [np.stack((prepostmaps[i], clfmaps[i]), axis=3) for i in range(len(clfmaps))]
permutation = np.arange(len(clfmaps))

### Helper Functions

In [None]:
def generate_tmap(integratingfunction, targetpath, affinepath):
    "generating t-statistic map using the given integrating function and paths to target and affine"
    # generate the t-statistic map using the function
    sl = Searchlight(sl_rad=sl_rad, shape=Cube)
    sl.distribute(subject_data, np.ones((91, 109, 91)))
    sl.broadcast(None)
    result = np.array(sl.run_searchlight(integratingfunction)).astype(np.float32)
    
    # save result
    affine = image.load_img(affinepath).affine
    nib.save(nib.Nifti1Image(result.astype(np.float32), affine), targetpath)
    
    # remove NaNs from result
    !module load fsl; fslmaths "$targetpath" -nan "$targetpath"

def cluster_summary(inputpath, target_tfce_path, percentile):
    "Summarize cluster information before any permutation-sampling-derived thresholding"
    
    ## take their mean
    result = image.load_img(inputpath).get_data()
    threshold = np.percentile(result.flatten(), percentile)
    print('Voxel Intensity Threshold For Non-TFCE Results:', threshold)
    #plt.hist(result.flatten())
    #plt.axvline(x=threshold, color='r')
    #plt.title("Distribution of Voxel Intensities/T-Statistics\nWith {}th Percentile Marked".format(percentile))
    #plt.xlabel('Voxel Intensity/T-Statistic')
    #plt.ylabel('Frequency')
    #plt.show()

    # plot result
    print('T-Statistic Maps With Threshold')
    localizer_tmap_filename = inputpath
    plotting.plot_glass_brain(localizer_tmap_filename, threshold=threshold,
                            colorbar=True, plot_abs=False, title='Thresholded Z-Stat Glass Brain')
    plotting.plot_stat_map(localizer_tmap_filename, cut_coords=(2, -21, 11), threshold=threshold, title='Thresholded Stat')
    plt.show()
    
    #view = plotting.view_img_on_surf(localizer_tmap_filename, threshold=threshold, black_bg=False, cmap='black_pink')
    #return view

    # compare with cluster analysis output
    print('Thresholded Cluster Analysis Figures')
    inputimage = inputpath
    !module load fsl; fslmaths "$inputimage" -nan "$inputimage"
    result = !module load fsl; cluster -i "$inputimage" -t "$threshold" -o cluster_index --osize=cluster_size

    #localizer_tmap_filename = 'cluster_index.nii.gz'
    #plotting.plot_glass_brain(localizer_tmap_filename, plot_abs=False, colorbar=True, title='Clusters, Ranked by Size, Ascending')
    #plt.show()

    localizer_tmap_filename = 'cluster_size.nii.gz'
    plotting.plot_glass_brain(localizer_tmap_filename, plot_abs=False, colorbar=True, title='Clusters, Scored by Size')
    plt.show()

    # compare with tfce output
    print('Raw TFCE Output')
    localizer_tmap_filename = target_tfce_path
    !module load fsl; fslmaths "$inputimage" -tfce 2 0.5 6 "$localizer_tmap_filename"
    plotting.plot_glass_brain(localizer_tmap_filename, plot_abs=False, colorbar=True, title='Raw TFCE Glass Brain')
    #plotting.plot_stat_map(localizer_tmap_filename, cut_coords=(2, -21, 11), title='TFCE Stat Brain')
    plt.show()

    # consider distribution of tfce values
    print('Distribution of TFCE Scores')
    result = image.load_img(target_tfce_path).get_data()
    print('25th percentile:', np.percentile(result.flatten(), 25))
    print('50th percentile:', np.percentile(result.flatten(), 50))
    print('75th percentile:', np.percentile(result.flatten(), 75))
    print('99th percentile:', np.percentile(result.flatten(), 99))
    
def generate_null_distribution(integratingfunction, samplesize, tag):
    global permutation
    
    nullpath = '{}_maxes.txt'.format(tag)
    # set of max cluster sizes to build
    sizes = []
    try:
        sofar = open(nullpath).read()
    except FileNotFoundError:
        sofar = ''

    sl = Searchlight(sl_rad=sl_rad, shape=Cube)
    sl.distribute(subject_data, np.ones((91, 109, 91)))
    sl.broadcast(None)
    
    # loop samplesize times to sample null distribution
    for i in tqdm(range(samplesize-sofar.count('\n'))):
        np.random.shuffle(permutation) # not relevant to sign-flipping approaches
    
        # create permuted map
        permutationpath =  '{}_perm.nii.gz'.format(tag)
        output = np.array(sl.run_searchlight(integratingfunction)).astype(np.float32)
        nib.save(nib.Nifti1Image(output, clfaffine), permutationpath)

        # perform tfce on the map
        !module load fsl; fslmaths "$permutationpath" -nan "$permutationpath" # remove NaNs first!
        !module load fsl; fslmaths "$permutationpath" -tfce 2 0.5 6 "$permutationpath"

        # extract max from tfce score map
        size = np.max(image.load_img(permutationpath).get_data())

        # store 
        sizes.append(size)
        f = open(nullpath, 'a')
        f.write("{}\n".format(sizes[-1]))
        f.close()

def plot_arbitrary_perm(tag):
    print('TFCE of Arbitrary Permutation From Analysis')
    localizer_tmap_filename = '{}_perm.nii.gz'.format(tag)
    view = plotting.plot_glass_brain(localizer_tmap_filename, plot_abs=False, colorbar=True, title='Raw TFCE Glass Brain')
    #plotting.plot_stat_map(localizer_tmap_filename, cut_coords=(2, -21, 11), title='TFCE Stat Brain')
    plt.show()
    
def report_null_distribution(tag, percentile):
    # generate data
    nullpath = '{}_maxes.txt'.format(tag)
    maximums = np.array([float(each) for each in open(nullpath).read().split('\n')[:-1]])
    sizethreshold = np.percentile(maximums, percentile)
    
    # plotting
    plt.hist(maximums)
    plt.axvline(x=sizethreshold, color='r')
    plt.title("Distribution of Biggest TFCE Scores\nWith {}th Percentile Marked".format(percentile))
    plt.xlabel('Maximum TFCE Score')
    plt.ylabel('Frequency')
    plt.show()
    print('TFCE Score Threshold:', sizethreshold)
    return sizethreshold

## Mean Prepost Clf Maps
Run the clustering analysis pipeline on the mean prepost clf map using an arbitrary cluster threshold.

### Create T-Statistic Map of Mean Prepost Clf

In [None]:
# function to operate over every searchlight region
targetpath = os.path.join(SAVE_PATH, 'mean_acc_searchlight.nii.gz')
affinepath = os.path.join(SAVE_PATH, sub_list[0]+'_standard_acc_searchlight.nii.gz')

def meanprepost(subject_data, mask, sl_rad, bcast_var):
    subject_data = np.array(subject_data)
    prepost_vector = subject_data[:,1,1,1,0]
    tstat = ttest_1samp(prepost_vector, 0).statistic
    return tstat

generate_tmap(meanprepost, targetpath, affinepath)

### Plot The PrePost Clf Map and Choose an Intensity Threshold From the Value Distribution

In [None]:
percentile = 99
inputpath = os.path.join(SAVE_PATH, 'mean_acc_searchlight.nii.gz')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_acc_searchlight.nii.gz')

cluster_summary(inputpath, target_tfce_path, percentile)

### Sampling Max Cluster Sizes from Null Distribution

In [None]:
samplesize = 100
tag = 'tfceacc' # tfcemeannegclf

# function to operate over every searchlight region
def permute_meanclf(subject_data, mask, sl_rad, bcast_var):
    subject_data = np.array(subject_data)
    clf_vector = subject_data[:,1,1,1,0]
    tstat = ttest_1samp(clf_vector, 0).statistic
    choice = np.random.choice([tstat, -tstat], size=1)
    return choice

generate_null_distribution(permute_meanclf, samplesize, tag)

### Arbitrary Image From Permutations

In [None]:
tag = 'tfceacc'
plot_arbitrary_perm(tag)

### Plot Null Distribution and TFCE Output Thresholded on Percentile Thereof

In [None]:
# find associated tfce threshold
print('Sampling Threshold from Null Distribution Results')
percentile = 95
tag = 'tfceacc'
tfcethreshold = report_null_distribution(tag, percentile)
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_acc_searchlight.nii.gz')

# plot result with threshold
#print('Thresholded TFCE Output')

In [None]:
for roi in rois:
    print(roi)
    view = plotting.plot_glass_brain(target_tfce_path, plot_abs=False, colorbar=True, threshold=tfcethreshold,
                              title='Thresholded TFCE Glass Brain')
    view.add_contours(maskpath.format(roi))
    plt.show()

In [None]:
plotting.view_img_on_surf(target_tfce_path, threshold=tfcethreshold, black_bg=True)#, cmap='black_pink')

## Mean Target Selectivity Maps
Run the clustering analysis pipeline on the mean target selectivity map using an arbitrary cluster threshold.

### Create T-Statistic Map of Mean Target Selectivity

In [None]:
# function to operate over every searchlight region
targetpath = os.path.join(SAVE_PATH, 'mean_acc_searchlight.nii.gz')
affinepath = os.path.join(SAVE_PATH, sub_list[0]+'_standard_acc_searchlight.nii.gz')

def meanclf(subject_data, mask, sl_rad, bcast_var):
    subject_data = np.array(subject_data)
    clf_vector = subject_data[:,1,1,1,1]
    tstat = ttest_1samp(clf_vector, 0).statistic
    return -tstat

generate_tmap(meanclf, targetpath, affinepath)

### Plot The Mean Target Selectivity Map and Choose an Intensity Threshold From the Value Distribution

In [None]:
percentile = 99
inputpath = os.path.join(SAVE_PATH, 'mean_acc_searchlight.nii.gz')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_mean_acc_searchlight.nii.gz')

cluster_summary(inputpath, target_tfce_path, percentile)

### Sampling Max Cluster Sizes from Null Distribution

In [None]:
samplesize = 100
tag = 'tfcemeanacc' # tfcemeannegclf

# function to operate over every searchlight region
def permute_meanclf(subject_data, mask, sl_rad, bcast_var):
    subject_data = np.array(subject_data)
    clf_vector = subject_data[:,1,1,1,1]
    tstat = ttest_1samp(clf_vector, 0).statistic
    choice = np.random.choice([tstat, -tstat], size=1)
    return choice

generate_null_distribution(permute_meanclf, samplesize, tag)

### Arbitrary Image From Permutations

In [None]:
tag = 'tfcemeanacc'
plot_arbitrary_perm(tag)

### Plot Null Distribution and TFCE Output Thresholded on Percentile Thereof

In [None]:
# find associated tfce threshold
print('Sampling Threshold from Null Distribution Results')
percentile = 95
tag = 'tfcemeanacc'
tfcethreshold = report_null_distribution(tag, percentile)
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_mean_clf_searchlight.nii.gz')

# plot result with threshold
#print('Thresholded TFCE Output')

In [None]:
for roi in rois:
    print(roi)
    view = plotting.plot_glass_brain(target_tfce_path, plot_abs=False, colorbar=True, threshold=tfcethreshold,
                              title='Thresholded TFCE Glass Brain')
    view.add_contours(maskpath.format(roi))
    plt.show()

In [None]:
plotting.view_img_on_surf(target_tfce_path, threshold=tfcethreshold, black_bg=False, cmap='black_pink')

## Mean Prepost Differentiation Maps
Run the clustering analysis pipeline on the mean pre-post differentiation map using an arbitrary cluster threshold.

### Create T-Statistic Map of Mean Pre-Post Differentiation

In [None]:
# function to operate over every searchlight region
targetpath = os.path.join(SAVE_PATH, 'negmean_prepost_searchlight.nii.gz')
affinepath = os.path.join(SAVE_PATH, sub+'_neurosketch_standard_prepost_searchlight.nii.gz')

def meanprepost(subject_data, mask, sl_rad, bcast_var):
    subject_data = np.array(subject_data)
    prepost_vector = subject_data[:,1,1,1,0]
    tstat = ttest_1samp(prepost_vector, 0).statistic
    return -tstat

generate_tmap(meanprepost, targetpath, affinepath)

### Plot The Mean Pre-Post Differentiation Map and Choose an Intensity Threshold From the Value Distribution

In [None]:
percentile = 99
inputpath = os.path.join(SAVE_PATH, 'negmean_prepost_searchlight.nii.gz')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_negprepost_searchlight.nii.gz')

cluster_summary(inputpath, target_tfce_path, percentile)

### Sampling Max Cluster Sizes from Null Distribution

In [None]:
samplesize = 100
tag = 'tfcemeannegprepost'

# function to operate over every searchlight region
def permute_meanprepost(subject_data, mask, sl_rad, bcast_var):
    subject_data = np.array(subject_data)
    prepost_vector = subject_data[:,1,1,1,0]
    tstat = ttest_1samp(prepost_vector, 0).statistic
    choice = np.random.choice([tstat, -tstat], size=1)
    return choice

generate_null_distribution(permute_meanprepost, samplesize, tag)

### Arbitrary Image From Permutations

In [None]:
tag = 'tfcemeannegprepost'
plot_arbitrary_perm(tag)

### Plot Null Distribution and TFCE Output Thresholded on Percentile Thereof

In [None]:
# find associated tfce threshold
percentile = 95
tag = 'tfcemeanprepost'
tfcethreshold = report_null_distribution(tag, percentile)

# plot result with threshold
print('Thresholded TFCE Output')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_prepost_searchlight.nii.gz')
plotting.plot_glass_brain(target_tfce_path, plot_abs=False, colorbar=True, threshold=tfcethreshold,
                          title='Thresholded TFCE Glass Brain')
plt.show()

In [None]:
for roi in rois:
    print(roi)
    view = plotting.plot_glass_brain(target_tfce_path, plot_abs=False, colorbar=True, threshold=tfcethreshold,
                              title='Thresholded TFCE Glass Brain')
    view.add_contours(maskpath.format(roi))
    plt.show()

In [None]:
plotting.view_img_on_surf(target_tfce_path, threshold=tfcethreshold, black_bg=False, cmap='black_purple')

## Correlation of Prepost Differentiation and Target Selectivity Maps
Run the clustering analysis pipeline on the correlation map using an arbitrary cluster threshold.

### Create T-Statistic Map of Mean Pre-Post Differentiation

In [None]:
# function to operate over every searchlight region
targetpath = os.path.join(SAVE_PATH, 'clfcor_searchlight.nii.gz')
affinepath =os.path.join(SAVE_PATH, sub_list[0]+'_standard_clf_searchlight.nii.gz')

def corr(subject_data, mask, sl_rad, bcast_var):
    
    # extract target selectivity and prepost differentiation vectors from data
    subject_data = np.array(subject_data)
    prepost_vector = subject_data[:,1,1,1,0]
    clf_vector = subject_data[:,1,1,1,1]
    
    # compute correlation
    correlation, pvalue = pearsonr(clf_vector, prepost_vector)
    
    # convert 1-tailed p-value into z-score using normal ppf
    zscore = stats.norm.ppf(pvalue/2)
    
    # flip sign of zscore if its sign doesn't match that of corelation
    if correlation*zscore > 0:
        return zscore
    else:
        return -zscore

generate_tmap(corr, targetpath, affinepath)

### Plot The Correlation Map and Choose an Intensity Threshold From the Value Distribution

In [None]:
percentile = 99
inputpath = os.path.join(SAVE_PATH, 'clfcor_searchlight.nii.gz')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_clfcor_searchlight.nii.gz')

cluster_summary(inputpath, target_tfce_path, percentile)

### Sampling Max Cluster Sizes from Null Distribution

In [None]:
samplesize = 100
tag = 'tfceclfcor'

# function to operate over every searchlight region
def permute_corr(subject_data, mask, sl_rad, bcast_var):
    global permutation
    
    # extract target selectivity and prepost differentiation vectors from data
    subject_data = np.array(subject_data)
    prepost_vector = subject_data[:,1,1,1,0]
    clf_vector = subject_data[:,1,1,1,1]
    
    # compute correlation
    correlation, pvalue = pearsonr(clf_vector, prepost_vector[permutation])
    
    # convert 1-tailed p-value into z-score using normal ppf
    zscore = stats.norm.ppf(pvalue/2)
    
    # flip sign of zscore if its sign doesn't match that of corelation
    if correlation*zscore > 0:
        return zscore
    else:
        return -zscore

generate_null_distribution(permute_corr, samplesize, tag)

### Arbitrary Image From Permutations

In [None]:
tag = 'tfcecor'
plot_arbitrary_perm(tag)

### Plot Null Distribution and TFCE Output Thresholded on Percentile Thereof

In [None]:
# find associated tfce threshold
print('Sampling Threshold from Null Distribution Results')
percentile = 95
tag = 'tfceclfcor'
tfcethreshold = report_null_distribution(tag, percentile)

# plot result with threshold
print('Thresholded TFCE Output')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_clfcor_searchlight.nii.gz')
plotting.plot_glass_brain(target_tfce_path, plot_abs=False, colorbar=True, threshold=tfcethreshold,
                          title='Thresholded TFCE Glass Brain')
plt.show()

## NEGATIVE Correlation of Prepost Differentiation and Target Selectivity Maps
Run the clustering analysis pipeline on the correlation map using an arbitrary cluster threshold.

### Create T-Statistic Map of Negative Correlation

In [None]:
# function to operate over every searchlight region
targetpath = os.path.join(SAVE_PATH, 'negcor_searchlight.nii.gz')
affinepath =os.path.join(SAVE_PATH, sub_list[0]+'_standard_clf_searchlight.nii.gz')

def corr(subject_data, mask, sl_rad, bcast_var):
    
    # extract target selectivity and prepost differentiation vectors from data
    subject_data = np.array(subject_data)
    prepost_vector = subject_data[:,1,1,1,0]
    clf_vector = subject_data[:,1,1,1,1]
    
    # compute correlation
    correlation, pvalue = pearsonr(clf_vector, prepost_vector)
    
    # convert 1-tailed p-value into z-score using normal ppf
    zscore = stats.norm.ppf(pvalue/2)
    
    # flip sign of zscore if its sign doesn't match that of corelation
    if correlation*zscore < 0:
        return zscore
    else:
        return -zscore

generate_tmap(corr, targetpath, affinepath)

### Plot The Correlation Map and Choose an Intensity Threshold From the Value Distribution

In [None]:
percentile = 99
inputpath = os.path.join(SAVE_PATH, 'negcor_searchlight.nii.gz')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_negcor_searchlight.nii.gz')

cluster_summary(inputpath, target_tfce_path, percentile)

### Sampling Max Cluster Sizes from Null Distribution

In [None]:
samplesize = 1000
tag = 'tfcenegcor'

# function to operate over every searchlight region
def permute_corr(subject_data, mask, sl_rad, bcast_var):
    global permutation
    
    # extract target selectivity and prepost differentiation vectors from data
    subject_data = np.array(subject_data)
    prepost_vector = subject_data[:,1,1,1,0]
    clf_vector = subject_data[:,1,1,1,1]
    
    # compute correlation
    correlation, pvalue = pearsonr(clf_vector, prepost_vector[permutation])
    
    # convert 1-tailed p-value into z-score using normal ppf
    zscore = stats.norm.ppf(pvalue/2)
    
    # flip sign of zscore if its sign doesn't match that of corelation
    if correlation*zscore < 0:
        return zscore
    else:
        return -zscore

generate_null_distribution(permute_corr, samplesize, tag)

### Arbitrary Image From Permutations

In [None]:
tag = 'tfcenegcor'
plot_arbitrary_perm(tag)

### Plot Null Distribution and TFCE Output Thresholded on Percentile Thereof

In [None]:
# find associated tfce threshold
print('Sampling Threshold from Null Distribution Results')
percentile = 95
tag = 'tfcenegcor'
tfcethreshold = report_null_distribution(tag, percentile)

# plot result with threshold
print('Thresholded TFCE Output')
target_tfce_path = os.path.join(SAVE_PATH, 'tfce_negcor_searchlight.nii.gz')
plotting.plot_glass_brain(target_tfce_path, plot_abs=False, colorbar=True, threshold=tfcethreshold,
                          title='Thresholded TFCE Glass Brain')
plt.show()

## Identify Where Significant Target Selectivity and Prepost Integration Clustering Overlap

In [None]:
selectivitypath = os.path.join(SAVE_PATH, 'tfce_mean_clf_searchlight.nii.gz')
selectivitythreshold = report_null_distribution('tfcemeanclf', percentile)
selectivitymap = image.load_img(selectivitypath).get_data()

integrationpath = os.path.join(SAVE_PATH, 'tfce_negprepost_searchlight.nii.gz')
integrationthreshold = report_null_distribution('tfcemeannegprepost', percentile)
integrationmap = image.load_img(integrationpath).get_data()

overlapmap = np.logical_and(selectivitymap > selectivitythreshold, integrationmap > integrationthreshold)
np.sum(overlapmap)

In [None]:
integrationmap[np.logical_not(overlapmap)] = 0
affine = image.load_img(affinepath).affine
nib.save(nib.Nifti1Image(integrationmap.astype(np.float32), affine), os.path.join(SAVE_PATH, 'overlap_negprepost_searchlight.nii.gz'))

In [None]:
plotting.view_img_on_surf(os.path.join(SAVE_PATH, 'overlap_negprepost_searchlight.nii.gz'), threshold=integrationthreshold, black_bg=False, cmap='black_pink')