In [None]:
import pandas as pd
import numpy as np
import os
import random
from scipy.stats import pearsonr,spearmanr

In [None]:
# calculate the correltions between neuroimaging measures and the variable
def calculate_correlations(CSA_data, CT_data, FC_data, variable):
    s1 = CSA_data.shape
    s2 = FC_data.shape
    subsampling_times = int(s1[0]/200)
    random_num = 100
    random_data_CSA1 = np.zeros((subsampling_times,random_num,s1[1],2))
    random_data_CT1 = np.zeros((subsampling_times,random_num,s1[1],2))
    random_data_FC1 = np.zeros((subsampling_times,random_num,s2[1],2))

    random_data_CSA2 = np.zeros((subsampling_times,random_num,s1[1],2))
    random_data_CT2 = np.zeros((subsampling_times,random_num,s1[1],2))
    random_data_FC2 = np.zeros((subsampling_times,random_num,s2[1],2))

    for i in range(subsampling_times):
        print(i)
        random_sample = 100*(i+1)
        for j in range(random_num):
            total_list = np.arange(s1[0]).tolist()
            random_inds1 = random.sample(total_list,random_sample)
            rest_total_list = list(set(total_list) - set(random_inds1))
            random_inds2 = random.sample(rest_total_list,random_sample)

            X1 = CSA_data[random_inds1,:]
            X2 = CSA_data[random_inds2,:]

            Y1 = CT_data[random_inds1,:]
            Y2 = CT_data[random_inds2,:]

            Z1 = FC_data[random_inds1,:]
            Z2 = FC_data[random_inds2,:]

            m1 = variable[random_inds1]
            m2 = variable[random_inds2]

            for k in range(s1[1]):
                r1,p1 = spearmanr(X1[:,k],m1)
                r2,p2 = spearmanr(X2[:,k],m2)
                random_data_CSA1[i,j,k,0] = r1
                random_data_CSA1[i,j,k,1] = p1
                random_data_CSA2[i,j,k,0] = r2
                random_data_CSA2[i,j,k,1] = p2

                r1,p1 = spearmanr(Y1[:,k],m1)
                r2,p2 = spearmanr(Y2[:,k],m2)
                random_data_CT1[i,j,k,0] = r1
                random_data_CT1[i,j,k,1] = p1
                random_data_CT2[i,j,k,0] = r2
                random_data_CT2[i,j,k,1] = p2


            for k in range(s2[1]):
                r1,p1 = spearmanr(Z1[:,k],m1)
                r2,p2 = spearmanr(Z2[:,k],m2)
                random_data_FC1[i,j,k,0] = r1
                random_data_FC1[i,j,k,1] = p1
                random_data_FC2[i,j,k,0] = r2
                random_data_FC2[i,j,k,1] = p2
                
    return random_data_CSA1,random_data_CT1,random_data_FC1,random_data_CSA2,random_data_CT2,random_data_FC2

In [None]:
# conducting correlation analysis while sampling from 100 to half the full sample size
data_path = '/data/sliu/sampling_ukbb_analysis/data/'
t = 'Numeric_memory'
data_path2 = os.path.join(data_path,t)
CSA_file_name = 'CSA_with_controlling_for_total_brain.csv'
CSA_file_path = os.path.join(data_path2,CSA_file_name)
CT_file_name = 'CT_with_controlling_for_total_brain.csv'
CT_file_path = os.path.join(data_path2,CT_file_name)
FC_file_name = 'FC_data.csv'
FC_file_path = os.path.join(data_path2,FC_file_name)
variable_name = 'variable_with_controlling_for_total_brain.csv'
variable_file_path = os.path.join(data_path2,variable_name)

CSA_data = pd.read_csv(CSA_file_path).iloc[:,:-1].values
CT_data = pd.read_csv(CT_file_path).iloc[:,:-1].values
FC_data = pd.read_csv(FC_file_path).iloc[:,:-1].values
variable_data = pd.read_csv(variable_file_path).iloc[:,0].values
print("Calculating correlations...")
random_data_CSA1,random_data_CT1,random_data_FC1,random_data_CSA2,random_data_CT2,random_data_FC2 = calculate_correlations(CSA_data,CT_data,FC_data,variable_data)

In [None]:
# save the results of correlation analysis
CSA_file_name1 = 'new_results/'+t+'/random/random_data_CSA1.npy'
CSA_file_name2 = 'new_results/'+t+'/random/random_data_CSA2.npy'

CT_file_name1 = 'new_results/'+t+'/random/random_data_CT1.npy'
CT_file_name2 = 'new_results/'+t+'/random/random_data_CT2.npy'

FC_file_name1 = 'new_results/'+t+'/random/random_data_FC1.npy'
FC_file_name2 = 'new_results/'+t+'/random/random_data_FC2.npy'

np.save(CSA_file_name1,random_data_CSA1)
np.save(CSA_file_name2,random_data_CSA2)

np.save(CT_file_name1,random_data_CT1)
np.save(CT_file_name2,random_data_CT2)

np.save(FC_file_name1,random_data_FC1)
np.save(FC_file_name2,random_data_FC2)