In [1]:
import os

import networkx as nx
import numpy as np
import pandas as pd

from graspy.utils import import_graph, pass_to_ranks
from graspy.embed import OmnibusEmbed, ClassicalMDS
from graspy.cluster import GaussianCluster
from graspy.plot import heatmap, pairplot

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
def import_edgelist(path, nodetype=int, delimiter=None):
    """
    Function for reading an edgelist and returning a numpy array.
    The order of nodes are sorted by node values.

    Parameters
    ----------
    path : str
    delimiter : str, optional
    
    Returns
    -------
    graph : array-like, shape (n_vertices, n_vertices)
        Adjacency matrix of the graph created from edgelist.
    """
    G = nx.read_weighted_edgelist(path, delimiter=delimiter, nodetype=nodetype)

    return import_graph(G)

## parse the files in fmri and dmri folders and find subjects with both fmri and dmri

In [3]:
fmri_path = '../../data/HBN/fmri/desikan/'
fmris = os.listdir(fmri_path)

dmri_path = '../../data/HBN/dwi/desikan/'
dmris = os.listdir(dmri_path)

subjects = [s.split('_')[0] for s in fmris]
subjects_unique = sorted(list(set(subjects)))

subjects_corrected = []

for subject in subjects_unique:
    fmri_tmp = [f for f in fmris if subject in f]
    dmri_tmp = [f for f in dmris if subject in f]
    if (len(fmri_tmp) == 1) & (len(dmri_tmp) == 1):
        subjects_corrected.append(subject)

dmris_corrected = []
fmris_corrected = []

for subject in subjects_corrected:
    for i in dmris:
        if subject in i:
            dmris_corrected.append(i)
           
    for i in fmris:
        if subject in i:
            fmris_corrected.append(i)
        
dmris_corrected
fmris_corrected

len(fmris_corrected), len(dmris_corrected)

(293, 293)

In [4]:
for idx in range(293):
    f = fmris_corrected[idx].split('_')
    d = dmris_corrected[idx].split('_')
    
    assert f[0] == d[0]
    #assert f[1] == d[1]

## Remove subjects with empty dwi or fmri scans

In [5]:
fmris = []
dmris = []

for idx, (fmri, dmri) in enumerate(zip(fmris_corrected, dmris_corrected)):
    fmri_graph = import_edgelist(fmri_path + fmri)
    dmri_graph = import_edgelist(dmri_path + dmri)
    
    if fmri_graph.shape == dmri_graph.shape:
        fmris.append(fmri)
        dmris.append(dmri)
        #fmri_graphs.append(fmri_graph)
        #dmri_graphs.append(dmri_graph)

## Remove subjects without any demographic information

In [6]:
subjects = [f.split('_')[0] for f in fmris]
subjects = [f.split('-')[1] for f in subjects]

len(subjects)

291

In [9]:
df = pd.read_csv('../../data/HBN_phenotypic_data/9994_Basic_Demos_20180927.csv')

In [10]:
df = df[['Patient_ID', 'Sex', 'Age']]
df.head()

Unnamed: 0,Patient_ID,Sex,Age
0,BASIC1_003,BASIC1_004,BASIC1_005
1,NDARYM832PX3,1,7.048254
2,NDARNJ687DMC,1,6.348163
3,NDARRM363BXZ,0,10.052589
4,NDARUW586LLL,1,12.319415


In [11]:
df.loc[df['Patient_ID'].isin(subjects)].to_csv('./subject_information.csv', index=False)