# Notebook to load and prepare the brain datasets

In [1]:
import pandas as pd
from glob import glob
from itertools import combinations

Insert the path containing the main folder where the correlation matrices, high-order interactions (HOI) metrics and clinical features of the volunteers are stored

In [2]:
corr_dir = 'path/to/correlation/files/'
hoi_dir = "path/to/hoi/files/"

Load the brain datasets and build a dataframe with volunteers information, such as, Subject ID, fMRI resting-state runs number (REST), Gender, Age, ect...

In [3]:
# Search for all files in the directory including subdirectories
hoi_dir += '**/*.csv'
corr_dir += '**/*_Atlas_MSMAll_hp2000_clean_Schaefer2018_100Parcels_7Networks_Tian_Subcortex_S1_3T_connmatrix.txt'

# Get all files of the individual's correlation matrices in the directory and store in a dataframe with the corresponding subject, REST and file path
corr_files = glob(corr_dir, recursive=True)
idx = corr_files[0].index("_REST")
files_id = [int(f[idx-12:idx-6]) for f in corr_files]
files_rest = [int(f[idx + 5]) for f in corr_files]
df_corr = pd.DataFrame({'Subject':files_id,'REST':files_rest,'corr_dir':corr_files})

# Get all files of HOIs in the directory and store in a dataframe with the corresponding subject, REST and file path
hoi_files = glob(hoi_dir, recursive=True)
idx = hoi_files[0].index("_REST")
files_id = [int(f[idx-11:idx-5]) for f in hoi_files]
files_rest = [int(f[idx + 5]) for f in hoi_files]
df_hoi = pd.DataFrame({'Subject':files_id,'REST':files_rest,'hoi_dir':hoi_files})

# Merge the correlation and HOI dataframes on the subject ID and REST columns
df_corr_hoi = pd.merge(df_corr, df_hoi,on=['Subject','REST'], how='right')

# Read the table containing the subject ID and gender information of the indivisuals
subject_gender_dir = "./hcp_subject_genders_info.xlsx"
df_gender = pd.read_excel(subject_gender_dir, usecols=['Subject', 'Gender'], engine='openpyxl')

# Merge indivisual's information with the correlation and HOI dataframes.
# This dataframe contains the selected individuals for the experiments of this study
df_gender_corr_hoi = pd.merge(df_gender, df_corr_hoi, how="inner", on=["Subject"])
df_gender_corr_hoi.dropna(inplace=True)

# Get the list of files of the correlation matrices and HOIs
files_corr = df_gender_corr_hoi.loc[:, "corr_dir"].tolist()
files_hoi = df_gender_corr_hoi.loc[:, "hoi_dir"].tolist()

n_rois = 116
triangles = list(combinations(range(n_rois), 3))

Load, compute and store the mean individual correlation matrix, and the z-scored HOI hyperedges using the Interaction Information ($II$) and Total Correlation ($TC$) metrics.

In [4]:
%run -i "hgsp_brain_dataset_setup.py"

  0%|          | 8/1978 [00:00<01:04, 30.64it/s]

Computing averaged correlation matrix...


100%|██████████| 1978/1978 [00:55<00:00, 35.58it/s]


Done! Averaged correlation matrix saved to ./Schaefer_100Parcels_Atlas/corr_avr_matrix.txt
Converting raw hyperedges to z-scored .npy files for further usage...


100%|██████████| 1978/1978 [19:51<00:00,  1.66it/s]
  0%|          | 6/1978 [00:00<00:36, 53.46it/s]

Done!
Computing mean individual z-scored hyperedges...


100%|██████████| 1978/1978 [00:32<00:00, 60.67it/s]

Done! Averaged z-scored hyperedges saved to ./Schaefer_100Parcels_Atlas/hoi_zscored_avr_hyperedges.npy



