In [1]:
# Author: Davide Aloi - PhD student - University of Birmingham
# Description: the script loads the results from wp_all_8_current_density_metrics, which are 3
# .csv files, and correlates current density metrics with DCM values.
# The analysis method used is Canonical correlation analysis (CCA)
# Add details on CCA
## WORKING ON THIS AT THE MOMENT (25012022)
# Imports
import warnings
warnings.filterwarnings("ignore")
import os
import numpy as np
import pandas as pd
import scipy.stats

In [2]:
## Parameters and variables: 
results_folder = 'D:\\roast-chapter3\\wp_all_results\\' # Folder with results (current density)
main_folder = 'C:\\Users\\davide\\Documents\\GitHub\\wp1_2_roast\\' # Project folder
dcm_results_folder = 'C:\\Users\\davide\\Documents\\GitHub\\wp1_2_roast\\all_dcm_results\\' # Folder with dcm results

# Datasets names and subjects lists
# db_names = ['wp2a', 'wp1b'] # waiting for wp1a results from Davinia
db_names = ['wp2a']

In [14]:
from sklearn.cross_decomposition import CCA

for db_id, db in enumerate(db_names):

    # Loading DCM results (nb: the unthresholded ones, for the correlation analysis)
    DCM = np.load(os.path.join(dcm_results_folder, db + '_dcm_unthresholded.npy'))
    DCM_sham = np.load(os.path.join(dcm_results_folder, db + '_dcm_sham_unthresholded.npy'))
    # Loading current density results
    cd_metrics = pd.read_csv(os.path.join(results_folder, db +'_current_density_results.csv'))

    # Simple R correlation (example)
    #x = cd_metrics['m1_medians'].values # Medians current density in M1
    #y = DCM[:, 0, 0] # NB: DCM index related to m1 <-> m1 (of all subjects) = [0,0]
    #r,p  = scipy.stats.pearsonr(x, y)    # Pearson's r
    #print ('R: ' + str(round(r,3)) + ', p: ' +  str(round(p,3)))

    # Extracting DCM values I am interested in (m1<->m1, th<->th, m1->th, th->m1)
    m1m1 = DCM[:,0,0]
    thth = DCM[:,1,1]
    m1th = DCM[:,1,0]
    thm1 = DCM[:,0,1]
    dcm_df = pd.DataFrame(zip(m1m1, thth, m1th, thm1),
                                columns = ['m1m1', 'thth', 'm1th',
                                'thm1'])

                     
    # We first standardise our variables by subtracting with mean and dividing by standard deviation
    cd_df_s = (cd_metrics-cd_metrics.mean())/(cd_metrics.std())
    dcm_df_s = (dcm_df-dcm_df.mean())/(dcm_df.std())

    ''' Damian: Basically your CCA analysis will give you pairs of canonical variates (one of
    each pair that describes the current measures, and one that describes the DCM). For each
    significant pair, you then correlate the CV from one side with each of the individual
    measures from the other side.
    This will basically tell you that e.g., Current density overall is most strongly related to
    DCM connectivity in pathways X and Y, and that DCM connectivity overall is most strongly
    related to current desnity in region A and B. '''

    cca = CCA(n_components = 2)
    cca.fit(cd_df_s, dcm_df_s)
    X_c, Y_c = cca.transform(cd_df_s, dcm_df_s) # X = current measures, Y = DCM measures    


In [26]:
X_c

array([[ 0.62387259, -0.81923435],
       [ 0.25689935, -0.10783622],
       [ 0.01079119, -0.54340172],
       [ 0.4759515 , -0.27450392],
       [-0.1927584 ,  0.63289417],
       [ 0.37981979, -1.25618762],
       [-0.66263521,  0.69415681],
       [-0.62758379, -0.12698738],
       [-1.15972746, -0.24444444],
       [-0.01204531,  0.24319472],
       [-0.51987833, -0.17585071],
       [-0.64966614,  0.65427355],
       [-0.2287235 , -1.45522025],
       [ 0.2413379 , -0.13711494],
       [-0.01997095,  0.5911504 ],
       [-0.19435387,  0.60016822],
       [ 0.92602281, -0.09285578],
       [-0.36446197, -1.30240238],
       [ 1.16073224,  0.18270058],
       [-0.1906962 ,  1.34232807],
       [ 0.74707375,  1.59517317]])

In [None]:
for db_id, db in enumerate(db_names):
    