In [1]:
import pandas as pd
import numpy as np
import os
from mne.viz import circular_layout, plot_connectivity_circle
from matplotlib import colors as mcolors
from matplotlib import pyplot as plt

### find_minimally_required_sample_sizes: The function to determine the minimally required sample sizes and produce the brain measures that can reach 75% replication probaility before sampling to half of the full sample

### The input parameters are as below:
1. mypath is the folder which includes the regional replicability

2. ts indictaes the significance thresholds: p<0.05, p<0.01, fdr_p<0.05, fdr_bonferroni<0.05. For example, ts = ['0.05', '0.01','fdr','bonferroni']

3. my_type indicates the imaging type: "CSA","CT", or "FC"

In [2]:
def find_minimally_required_sample_sizes(mypath,ts,mytype):
    # 75% replication probability
    RP = 0.75 
    # DK cortical regions
    regions = pd.read_csv('raw_data/Area_items.csv').iloc[:,-1].values.tolist()
    
    files = os.listdir(mypath)
    for f in files:
        for t in ts:
            reliability = pd.read_csv(mypath + f + '/' + mytype+'_reliability_'+t+'.csv')
            min_sizes = []
            Num_cols = reliability.shape[1]
            for i in range(Num_cols):
                data = reliability.iloc[:,i].values
                max_value = np.max(data)
                if max_value >= RP:
                    min_sizes.append(np.where(data >= RP)[0][0] + 1)
                else:
                    min_sizes.append(0)

            odata = pd.DataFrame(data=min_sizes)
            if mytype == 'FC':
                odata['FCs'] = np.arange(210) + 1
            else:
                odata['regions'] = regions
            odata.to_csv(mypath+f+'/MIN_size_'+ mytype+'_'+t+'.csv',index=False)

In [8]:
# runing function "find_minimally_required_sample_sizes"
ts = ['0.05','0.01','fdr','bonferroni']
# input_path = "regional_replicability_ttest/split_0.5/"
input_path = "regional_replicability/"
# FC_figure_path = "figures_ttest/split_0.5/FC_figures/"
FC_figure_path = "figures/FC_figures/"
# brain_figure_path = "figures_ttest/split_0.5/brain_figures/"
brain_figure_path = "figures/brain_figures/"

In [None]:
find_minimally_required_sample_sizes(input_path, ts, 'CSA')
find_minimally_required_sample_sizes(input_path, ts, 'CT')
find_minimally_required_sample_sizes(input_path, ts, 'FC')

In [17]:
from nilearn import surface
from nilearn import datasets
from nilearn import plotting

In [18]:
# function to plot brain figures
def plot_brain_figures(data,mytype,output_path,tmax,sigt = None):
    lh_atlas = surface.load_surf_data('raw_data/atlas/lh.aparc.annot')
    rh_atlas = surface.load_surf_data('raw_data/atlas/rh.aparc.annot')
    
    regions = pd.read_csv('raw_data/Area_items.csv').iloc[:,-1].values.tolist()
    mapping_matrix = pd.read_csv('raw_data/atlas/value_to_region_table.csv').set_index('regions')
    mapping_matrix = mapping_matrix.loc[regions]
    region_ids = mapping_matrix['value'].values
    
    s1 = lh_atlas.shape
    l_mask = np.zeros(s1)
    s2 = rh_atlas.shape
    r_mask = np.zeros(s2)
    
    for i in range(len(regions)):
        region = regions[i]
        region_id = region_ids[i]
        if region[4:6] == 'lh':
            l_mask[lh_atlas == region_id] = data[i]
        else:
            r_mask[rh_atlas == region_id] = data[i]
    
    fsaverage = datasets.fetch_surf_fsaverage(mesh='fsaverage5')
    
    plotting.plot_surf_stat_map(fsaverage['pial_left'], stat_map=l_mask,
                                hemi='left', view='lateral', cmap='coolwarm', colorbar=True,
                                bg_map=fsaverage['sulc_left'], bg_on_data=True,vmax=tmax,
                                darkness=.3,output_file=output_path + mytype+'_left_lateral.png')

    plotting.plot_surf_stat_map(fsaverage['pial_left'], stat_map=l_mask,
                                hemi='left', view='medial', cmap='coolwarm', colorbar=True,
                                bg_map=fsaverage['sulc_left'], bg_on_data=True,vmax=tmax,
                                darkness=.3,output_file=output_path + mytype+'_left_medial.png')


    plotting.plot_surf_stat_map(fsaverage['pial_right'], stat_map=r_mask,
                                hemi='right', view='medial', cmap='coolwarm', colorbar=True,
                                bg_map=fsaverage['sulc_right'], bg_on_data=True,vmax=tmax,
                                darkness=.3,output_file=output_path + mytype+'_right_medial.png')

    plotting.plot_surf_stat_map(fsaverage['pial_right'], stat_map=r_mask,
                                hemi='right', view='lateral', cmap='coolwarm', colorbar=True,
                                bg_map=fsaverage['sulc_right'], bg_on_data=True,vmax=tmax,
                                darkness=.3,output_file=output_path + mytype+'_right_lateral.png')
    
    if sigt != None:
        plotting.plot_surf_stat_map(fsaverage['pial_left'], stat_map=l_mask,
                                    hemi='left', view='lateral', cmap='coolwarm', colorbar=True,
                                    bg_map=fsaverage['sulc_left'], bg_on_data=True,vmax=tmax,threshold=sigt,
                                    darkness=.3,output_file=output_path + mytype+'_left_lateral_sig.png')

        plotting.plot_surf_stat_map(fsaverage['pial_left'], stat_map=l_mask,
                                    hemi='left', view='medial', cmap='coolwarm', colorbar=True,
                                    bg_map=fsaverage['sulc_left'], bg_on_data=True,vmax=tmax,threshold=sigt,
                                    darkness=.3,output_file=output_path + mytype+'_left_medial_sig.png')


        plotting.plot_surf_stat_map(fsaverage['pial_right'], stat_map=r_mask,
                                    hemi='right', view='medial', cmap='coolwarm', colorbar=True,
                                    bg_map=fsaverage['sulc_right'], bg_on_data=True,vmax=tmax,threshold=sigt,
                                    darkness=.3,output_file=output_path + mytype+'_right_medial_sig.png')

        plotting.plot_surf_stat_map(fsaverage['pial_right'], stat_map=r_mask,
                                    hemi='right', view='lateral', cmap='coolwarm', colorbar=True,
                                    bg_map=fsaverage['sulc_right'], bg_on_data=True,vmax=tmax,threshold=sigt,
                                    darkness=.3,output_file=output_path + mytype+'_right_lateral_sig.png')

## visualize_brain_regions: Main function to plot brain figures

### The input parameters are as below:
1. mypath is the folder which includes the regional replicability

2. ts indictaes the significance thresholds: p<0.05, p<0.01, fdr_p<0.05, fdr_bonferroni<0.05. For example, ts = ['0.05', '0.01','fdr','bonferroni']

3. my_type indicates the imaging type: "CSA","CT"

4. output_path is the folder where the brain figures are saved.

In [19]:
def visualize_brain_regions(mypath,ts,mytype,output_path):
    files = os.listdir(mypath)
    for t in ts:
        print(t)
        for f in files:
            print(f)
            if not os.path.exists(output_path+f):
                os.mkdir(output_path+f)
            
            reliability = pd.read_csv(mypath+f+'/'+mytype+'_reliability_'+t+'.csv')
            max_subjs = reliability.shape[0]
            Minsize = pd.read_csv(mypath+f+'/MIN_size_'+mytype+'_'+t+'.csv')
            tmp1 = 1- Minsize.iloc[:,0].values/max_subjs
            tmp1[np.where(tmp1 == 1)] = -0.1
            tmax = np.max(tmp1)
            tmp1[tmp1 >= 0] = tmp1[tmp1 >= 0]/tmax + 1
            tmp1[tmp1 < 0] = 0
            plot_brain_figures(tmp1,'minsize_' + mytype + '_' + t, output_path+f+'/', 2, 1)

In [2]:
# runing function "visualize_brain_regions"
ts = ['0.05','0.01','fdr','bonferroni']
visualize_brain_regions(input_path,ts,'CSA',brain_figure_path)
visualize_brain_regions(input_path,ts,'CT',brain_figure_path)

In [3]:
# function to plot the functional connectivity
def plot_FC_circular_connection(data,output_path):
    
    label_names = []
    for i in range(21):
        label_names.append(i+1)
        
    
    FC_info = pd.read_csv('raw_data/edge_list_d25.txt',sep=' ')
    node_order =[2,4,8,19,1,9,13,14,5,6,7,20,10,11,12,17,21,3,15,18,16]
    boundaries = [0,4,8,12,15,17]
    node_angles = circular_layout(label_names, node_order, start_pos=90, group_boundaries=boundaries)
    # cmaps = ['RdBu','Reds','Blues']
    
 
    FC_matrix = np.zeros((21,21))
    for j in range(210):
        if data[j] >= 1:
            x = FC_info.iloc[j,1] - 1
            y = FC_info.iloc[j,2] - 1
            FC_matrix[x,y] = data[j]
            FC_matrix[y,x] = data[j]

    fig = plt.figure(figsize=(10,10))    

    ax = plot_connectivity_circle(FC_matrix, label_names, vmin = 1, vmax = 2, facecolor = 'white', textcolor = 'black',
                        node_angles=node_angles, colormap='Reds', fig=fig, colorbar_size=0.5,
                        fontsize_colorbar=14,fontsize_names=14,linewidth=3)
    
    fig.savefig(output_path,dpi=300,bbox_inches = 'tight') 
    
    return

## visualize_FC: Main function to plot functional connectivity

### The input parameters are as below:
1. mypath is the folder which includes the regional replicability

2. ts indictaes the significance thresholds: p<0.05, p<0.01, fdr_p<0.05, fdr_bonferroni<0.05. For example, ts = ['0.05', '0.01','fdr','bonferroni']

4. output_path is the folder where the brain figures are saved.

In [4]:
def visualize_FC(mypath,ts,output_path):
    files = os.listdir(mypath)
    for t in ts:
        print(t)
        for f in files:
            reliability = pd.read_csv(mypath+f+'/FC_reliability_'+t+'.csv')
            max_subjs = reliability.shape[0]
            
            Minsize_FC = pd.read_csv(mypath+f+'/MIN_size_FC_'+t+'.csv')
            tmp1 = 1- Minsize_FC.iloc[:,0].values/max_subjs
            tmp1[np.where(tmp1 == 1)] = -0.1
            tmax = np.max(tmp1)
            tmp1[tmp1 >= 0] = tmp1[tmp1 >= 0]/tmax + 1
            tmp1[tmp1 < 0] = 0
            if not os.path.exists(output_path+f):
                os.mkdir(output_path+f)
            plot_FC_circular_connection(tmp1,output_path+f+'/minsize_'+t+'.png')

In [1]:
ts = ['0.05','0.01','fdr','bonferroni']
visualize_FC(input_path,ts,FC_figure_path)

In [24]:
# plot the minimally required sample size for 75% replication probability
def plot_lines_figure(data,title,output_file):
    sampling_times = data.shape[0]
    ind = np.where(data >= 0.75)[0][0]
    y1 = np.round(data[ind],2)
    x1 = (ind + 1)*100
    fig = plt.figure(figsize=(16,8))
    x = np.arange(0,sampling_times*100,100) + 100
    
    plt.plot(x,data,'k',color='coral',linewidth=4)
    plt.scatter(x1, y1, marker='o', color='black',s=30)
    if sampling_times - ind < 20:
        plt.text(x1-1500, y1+0.05, '('+str(x1)+','+str(y1)+')', fontsize=30)
    else:
        plt.text(x1+300, y1-0.05, '('+str(x1)+','+str(y1)+')', fontsize=30)
        
    plt.hlines(y=y1,xmin=-500,xmax=x1,linestyles='dashed',linewidth=3,color ='gray')
    plt.vlines(x=x1,ymin=-0.05,ymax=y1,linestyles='dashed',linewidth=3,color ='gray')
    
    plt.ylabel('replication probability',fontsize=30)  
    plt.title(title,fontsize=30)

    plt.ylim(-0.05,1.1)
    plt.xlim(-500,sampling_times*100+100)
    plt.xlabel('sample size',fontsize=30)
    ax = plt.gca()
    ax.spines['top'].set_linewidth(2)
    ax.spines['bottom'].set_linewidth(2)
    ax.spines['left'].set_linewidth(2)
    ax.spines['right'].set_linewidth(2)
    plt.tick_params(direction='out', length=10, width=4,labelsize=30)
#     plt.show()
    fig.savefig(output_file,dpi=300,bbox_inches = 'tight')
    return

## visualize_variation_replicability: Main function to visualize the improvement of replicability 

### The input parameters are as below:
1. mypath is the folder which includes the regional replicability

2. ts indictaes the significance thresholds: p<0.05, p<0.01, fdr_p<0.05, fdr_bonferroni<0.05. For example, ts = ['0.05', '0.01','fdr','bonferroni']

3. mytype indicates the imaging type: "CSA","CT" or "FC"

4. output_path is the folder where figures are saved.

In [25]:
def visualize_variation_replicability(mypath,ts,mytype,output_path):
    files = os.listdir(mypath)
    FC_info = pd.read_csv('raw_data/edge_list_d25.txt',sep=' ')
    regions = pd.read_csv('raw_data/Area_items.csv').iloc[:,-1].values.tolist()
    for t in ts:
        for f in files:
            if f == 'BM':
                continue
            data_reliability = pd.read_csv(mypath+f+'/MIN_size_'+mytype+'_'+t+'.csv')
            tmp = data_reliability.iloc[:,0].values
            data_reliability2 = pd.read_csv(mypath+f+'/MIN_size_'+mytype+'_'+t+'.csv')
            tmp1 = data_reliability2.iloc[:,0].values
            if np.sum(tmp1) == 0:
                print(f,'does not exit')
                continue
            tmp1[tmp1==0] = 1000
            min_pos = np.argmin(tmp1)
            data_reliability3 = pd.read_csv(mypath+f+'/'+mytype+'_reliability_'+t+'.csv')
            min_values = data_reliability3.iloc[:,min_pos].values
            real_values = data_reliability3.iloc[np.min(tmp1)-1,min_pos]
            if mytype == 'FC':
                my_title = mytype + ': ' + str(FC_info.iloc[min_pos,1])+ ' - ' + str(FC_info.iloc[min_pos,2])
                print(f,np.max(tmp)*100,np.min(tmp1)*100,real_values,min_pos+1)
            else:
                region = regions[min_pos]
                my_title = mytype + ': ' + region[4:]
                print(f,np.max(tmp)*100,np.min(tmp1)*100,real_values,region)

            plot_lines_figure(min_values,my_title,output_path+f+'/'+'best_minsize_'+mytype+'_'+t+'.png')

In [3]:
ts = ['0.05','0.01','fdr','bonferroni']
visualize_variation_replicability(input_path,ts,'FC',FC_figure_path)
visualize_variation_replicability(input_path,ts,'CT',brain_figure_path)
visualize_variation_replicability(input_path,ts,'CSA',brain_figure_path)