In [1]:
import pandas as pd
import numpy as np
import os
import random
from scipy.stats import pearsonr,spearmanr
from scipy import stats
from scipy.signal import savgol_filter

In [2]:
# fdr multiple testing correction
from statsmodels.stats import multitest

def fdr_correction(P):
    size = P.shape
    temp_p = P.flatten()
    Ps = multitest.multipletests(temp_p,alpha=0.05,method='fdr_bh')
    P_corrected = Ps[1].reshape(size)

    return P_corrected

  import pandas.util.testing as tm


In [3]:
# the function of calculating regional replicability
def estimate_reliability(data1,data2,t=None):
    s = data1.shape
    odata1 = np.zeros((s[0],s[1],s[2]))
    if t == 'fdr':
        for i in range(s[0]):
            for j in range(s[1]):
                corr_map1 = data1[i,j,:,:]
                corr_map2 = data2[i,j,:,:]
                correct_P1 = fdr_correction(corr_map1[:,1])
                correct_P2 = fdr_correction(corr_map2[:,1])

                tmp_data = np.zeros((s[2],2))
                for k in range(s[2]):
                    t1 = corr_map1[k,0]
                    t2 = corr_map2[k,0]
                    if t1*t2 > 0 and correct_P1[k] < 0.05 and correct_P2[k] < 0.05:
                        odata1[i,j,k] = 1
    elif t == 'bonferroni':
        for i in range(s[0]):
            for j in range(s[1]):
                corr_map1 = data1[i,j,:,:]
                corr_map2 = data2[i,j,:,:]
                
                tmp_data = np.zeros((s[2],2))
                tp = 0.05/s[2]
                for k in range(s[2]):
                    t1 = corr_map1[k,0]
                    t2 = corr_map2[k,0]
                    if t1*t2 > 0 and corr_map1[k,1] < tp and corr_map2[k,1] < tp:
                        odata1[i,j,k] = 1
    else:
        for i in range(s[0]):
            for j in range(s[1]):
                corr_map1 = data1[i,j,:,:]
                corr_map2 = data2[i,j,:,:]
                
                tmp_data = np.zeros((s[2],2))
                for k in range(s[2]):
                    t1 = corr_map1[k,0]
                    t2 = corr_map2[k,0]
                    if t1*t2 > 0 and corr_map1[k,1] < float(t) and corr_map2[k,1] < float(t):
                        odata1[i,j,k] = 1
    
    odata2 = np.zeros((s[0],s[2]))
    for i in range(s[0]):
        for j in range(s[2]):
            tmp_data2 = odata1[i,:,j]
            odata2[i,j] = np.sum(tmp_data2)/s[1]
            
    for j in range(s[2]):
        tmp = np.round(savgol_filter(odata2[:,j],7,2),3)
        odata2[:,j] = tmp
            
    return odata2

## statistical_analysis: main function to estimate regional replicability
### input parameters are as below:
1. file_path is the folder which includes the npy files (The bootstrapped correlations)

2. npy_file1 is the npy file from subsample1

3. npy_file2 is the npy file from subsample2

4. *t* indictaes the significance thresholds: p<0.05, p<0.01, fdr_p<0.05, fdr_bonferroni<0.05. For example, *t* = '0.05', '0.01','fdr','bonferroni'

5. mytype indicates the imaging type: "CSA","CT", or "FC"

6. output_path is the folder to save the regional replicability

In [4]:
def statistical_analysis(file_path,npy_file1,npy_file2,t,mytype,output_path):
    files = os.listdir(file_path)
    for f in files:
        file_path1 = os.path.join(file_path,f) + '/' + npy_file1
        file_path2 = os.path.join(file_path,f) + '/' + npy_file2
        random_data1 = np.load(file_path1)
        random_data2 = np.load(file_path2)

        reliability = estimate_reliability(random_data1,random_data2,t=t)
        if not os.path.exists(output_path+f):
            os.mkdir(output_path+f)
        file_name = output_path+f+'/'+mytype+'_reliability_'+t+'.csv'
        data = pd.DataFrame(data=reliability)
        data.to_csv(file_name,index=False)

In [7]:
# runing the function "statistical analysis":
ts = ['0.05', '0.01','fdr','bonferroni']
# input_path = "bootstrap_ttest/split_0.5/"
# output_path = "regional_replicability_ttest/split_0.5/"
input_path = "boostrap_correlations/"
output_path = "regional_replicability/"
for t in ts:
    print("The significance level is: ", t)
    statistical_analysis(input_path,"random_data_CSA1.npy","random_data_CSA2.npy",\
                         t,"CSA",output_path)
    statistical_analysis(input_path,"random_data_CT1.npy","random_data_CT2.npy",\
                         t,"CT",output_path)
    statistical_analysis(input_path,"random_data_FC1.npy","random_data_FC2.npy",\
                         t,"FC",output_path)

The significance level is:  0.05
The significance level is:  0.01
The significance level is:  fdr
The significance level is:  bonferroni
