In [31]:
import numpy as np
import networkx as nx
from os import path as op
import os
from glob import glob

from collections import OrderedDict

In [34]:
def load_graphs(output_dir,subjects,parcellation_scheme,weight):

    print('Load graph for {} parcellation scheme '.format(parcellation_scheme))
    
    if parcellation_scheme == 'Lausanne2018':
        bids_atlas_label = 'L2018'
    elif parcellation_scheme == 'NativeFreesurfer':
        bids_atlas_label = 'Desikan'
        
    multiscale_graphs = OrderedDict()

    if parcellation_scheme == 'NativeFreesurfer':
        
        singlescale_graphs = OrderedDict()
        
        for subj in subjects:
            print('> Process subject {}'.format(subj))
            subj_dir = os.path.join(output_dir,subj)
            subj_session_dirs = glob(op.join(subj_dir, "ses-*"))
            subj_sessions = ['ses-{}'.format(subj_session_dir.split("-")[-1]) for subj_session_dir in subj_session_dirs]

            if len(subj_sessions) > 0: #Session structure
                
                for subj_session in subj_sessions:
                    print('  Process session {} ()'.format(subj_session))
                    conn_derivatives_dir = op.join(output_dir, subj, subj_session, 'connectivity')
                    #Extract the connectivity graph 
                    connmat_fname = op.join(conn_derivatives_dir,'{}_{}_label-{}_conndata-snetwork_connectivity.gpickle'.format(subj,subj_session,bids_atlas_label))
                    print('    - Load graph {}'.format(connmat_fname))
                    G = nx.read_gpickle(connmat_fname)
                    
                    for u,v,d in G.edges(data=True):
                        weight_val = d[weight]
                        G.remove_edge(u,v)
                        G.add_edge(u,v)
                        G[u][v][weight] = weight_val
                        
                    fname = os.path.basename(connmat_fname)
                    singlescale_graphs[fname] = G
                    #Extract the connectivity matrix for a specific metric
                    #connmat = nx.to_numpy_matrix(conngraph,weight=weight,dtype=np.float32)
                singlescale_graphs[fname] = conngraph
                    
            else:
                conn_derivatives_dir = op.join(output_dir, subj,'connectivity')
                #Extract the connectivity graph 
                conngraph_fname = op.join(conn_derivatives_dir,'{}_label-{}_conndata-snetwork_connectivity.gpickle'.format(subj,bids_atlas_label))
                print('    - Load graph {}'.format(connmat_fname))
                G = nx.read_gpickle(connmat_fname)
                    
                for u,v,d in G.edges(data=True):
                    weight_val = d[weight]
                    G.remove_edge(u,v)
                    G.add_edge(u,v)
                    G[u][v][weight] = weight_val

                fname = os.path.basename(connmat_fname)
                singlescale_graphs[fname] = G
                #Extract the connectivity matrix for a specific metric
                #connmat = nx.to_numpy_matrix(conngraph,weight=weight,dtype=np.float32)
        multiscale_graphs['scale1'] = singlescale_graphs
                
    else:
        # For each parcellation scale
        for scale in np.arange(1,6):
            singlescale_graphs = OrderedDict()
            for subj in subjects:
                print('> Process subject {}'.format(subj))
                subj_dir = os.path.join(output_dir,subj)
                subj_session_dirs = glob(op.join(subj_dir, "ses-*"))
                subj_sessions = ['ses-{}'.format(subj_session_dir.split("-")[-1]) for subj_session_dir in subj_session_dirs]
                if len(subj_sessions) > 0: #Session structure
                    for subj_session in subj_sessions:
                        print('  Process session {}'.format(subj_session))
                        conn_derivatives_dir = op.join(output_dir, subj, subj_session, 'connectivity')
                        #Extract the connectivity graph 
                        #self.subject+'_label-'+bids_atlas_label+'_desc-scale5_conndata-snetwork_connectivity'
                        connmat_fname = op.join(conn_derivatives_dir,'{}_{}_label-{}_desc-scale{}_conndata-snetwork_connectivity.gpickle'.format(subj,subj_session,bids_atlas_label,scale))
                        print('    - Load graph {}'.format(connmat_fname))
                        G = nx.read_gpickle(connmat_fname)
                    
                        for u,v,d in G.edges(data=True):
                            weight_val = d[weight]
                            G.remove_edge(u,v)
                            G.add_edge(u,v)
                            G[u][v][weight] = weight_val

                        fname = os.path.basename(connmat_fname)
                        singlescale_graphs[fname] = G
                        #Extract the connectivity matrix for a specific metric
                        #connmat = nx.to_numpy_matrix(conngraph,weight=weight,dtype=np.float32)
                else:
                    conn_derivatives_dir = op.join(output_dir, subj,'connectivity')
                    #Extract the connectivity graph 
                    connmat_fname = op.join(conn_derivatives_dir,'{}_label-{}_desc-scale{}_conndata-snetwork_connectivity.gpickle'.format(subj,bids_atlas_label,scale))
                    print('    - Load graph {}'.format(connmat_fname))
                    G = nx.read_gpickle(connmat_fname)
                    
                    for u,v,d in G.edges(data=True):
                        weight_val = d[weight]
                        G.remove_edge(u,v)
                        G.add_edge(u,v)
                        G[u][v][weight] = weight_val
                        
                    fname = os.path.basename(connmat_fname)
                    singlescale_graphs[fname] = G
                    #Extract the connectivity matrix for a specific metric
                    #connmat = nx.to_numpy_matrix(conngraph,weight=weight,dtype=np.float32)
            
            multiscale_graphs['scale{}'.format(scale)] = singlescale_graphs

    return multiscale_graphs

In [35]:
multiscale_graphs = load_graphs(output_dir='/Users/sebastientourbier/Desktop/DS-test/derivatives/cmp',
            parcellation_scheme='Lausanne2018',
            subjects=['sub-A001','sub-A002'],
            weight='fiber_density')

Load graph for Lausanne2018 parcellation scheme 
> Process subject sub-A001
  Process session ses-20150203160809
    - Load graph /Users/sebastientourbier/Desktop/DS-test/derivatives/cmp/sub-A001/ses-20150203160809/connectivity/sub-A001_ses-20150203160809_label-L2018_desc-scale1_conndata-snetwork_connectivity.gpickle
  Process session ses-20150203160808
    - Load graph /Users/sebastientourbier/Desktop/DS-test/derivatives/cmp/sub-A001/ses-20150203160808/connectivity/sub-A001_ses-20150203160808_label-L2018_desc-scale1_conndata-snetwork_connectivity.gpickle
> Process subject sub-A002
    - Load graph /Users/sebastientourbier/Desktop/DS-test/derivatives/cmp/sub-A002/connectivity/sub-A002_label-L2018_desc-scale1_conndata-snetwork_connectivity.gpickle
> Process subject sub-A001
  Process session ses-20150203160809
    - Load graph /Users/sebastientourbier/Desktop/DS-test/derivatives/cmp/sub-A001/ses-20150203160809/connectivity/sub-A001_ses-20150203160809_label-L2018_desc-scale2_conndata-sne

Check ordered dictionary of graphs for each scale

In [39]:
for scale, graphs in multiscale_graphs.items():
    print('{} / list: {}'.format(scale,graphs))

scale1 / list: OrderedDict([('sub-A001_ses-20150203160809_label-L2018_desc-scale1_conndata-snetwork_connectivity.gpickle', <networkx.classes.graph.Graph object at 0xa134097d0>), ('sub-A001_ses-20150203160808_label-L2018_desc-scale1_conndata-snetwork_connectivity.gpickle', <networkx.classes.graph.Graph object at 0xa13409b50>), ('sub-A002_label-L2018_desc-scale1_conndata-snetwork_connectivity.gpickle', <networkx.classes.graph.Graph object at 0x103e98fd0>)])
scale2 / list: OrderedDict([('sub-A001_ses-20150203160809_label-L2018_desc-scale2_conndata-snetwork_connectivity.gpickle', <networkx.classes.graph.Graph object at 0xa13409e50>), ('sub-A001_ses-20150203160808_label-L2018_desc-scale2_conndata-snetwork_connectivity.gpickle', <networkx.classes.graph.Graph object at 0xa13409c50>), ('sub-A002_label-L2018_desc-scale2_conndata-snetwork_connectivity.gpickle', <networkx.classes.graph.Graph object at 0xa134095d0>)])
scale3 / list: OrderedDict([('sub-A001_ses-20150203160809_label-L2018_desc-scale

In [None]:
def scan_statistic(mygs, i):
    """
    Computes scan statistic-i on a set of graphs
    Required Parameters:
        mygs:
            - Dictionary of graphs
        i:
            - which scan statistic to compute
    """
    ss = OrderedDict()
    for key in list(mygs.keys()):
        g = mygs[key]
        tmp = np.array(())
        for n in g.nodes():
            sg = nx.ego_graph(g, n, radius=i)
            
            tmp = np.append(
                tmp,
                np.sum([sg.get_edge_data(e[0], e[1])[sg.get_edge_data(e[0], e[1]).keys()[0]] for e in sg.edges()]),
            )
        ss[key] = tmp
    return ss

def write(output_dir, metric, data, parcellation_scheme, scale=''):
    """
    Write computed derivative to disk in a pickle file
    Required parameters:
        output_dir:
            - Path to derivative save location
        metric:
            - The value that was calculated
        data:
            - The results of this calculation
        parcellation scheme:
            - Parcellation scheme used (L2018, L2008, Desikan)
        scale:
            - scale (only used if parcellation scheme is L2008 or L2018)
    """
    import pickle
    if parcellation_scheme == 'Desikan':
        with open(op.join(output_dir, "{}_{}.pkl".format(parcellation_scheme,metric)), "wb") as of:
            pickle.dump({metric: data}, of)
    else:
        with open(op.join(output_dir, "{}_{}_{}.pkl".format(parcellation_scheme,scale,metric)), "wb") as of:
            pickle.dump({metric: data}, of)
        

def show_means(data):
    print(
        (
            "Subject Means: "
            + ", ".join(["%.2f" % np.mean(data[key]) for key in list(data.keys())])
        )
    )

def compute_network_metrics(output_dir, multiscale_graphs, parcellation_scheme,weight):
    for scale, graphs in multiscale_graphs.items():
        print('> Process {}'.format(scale))     
        nodes = nx.number_of_nodes(list(graphs.values())[0])
        #  Number of non-zero edges (i.e. binary edge count)
        print("Computing: NNZ")
        nnz = OrderedDict((subj, len(nx.edges(graphs[subj]))) for subj in graphs)
        print(("Sample Mean: %.2f" % np.mean(list(nnz.values()))))
        write(output_dir, "number_non_zeros", nnz, parcellation_scheme, scale)
        
        # Scan Statistic-1
        print("Computing: Max Local Statistic Sequence")
        temp_ss1 = scan_statistic(graphs, 1)
        ss1 = temp_ss1
        write(output_dir, "locality_statistic", ss1, parcellation_scheme, scale)
        show_means(temp_ss1)
        
        #   Clustering Coefficients
        print("Computing: Clustering Coefficient Sequence")
        temp_cc = OrderedDict(
            (subj, list(nx.clustering(graphs[subj], weight=weight).values())) for subj in graphs
        )
        ccoefs = temp_cc
        write(output_dir, "clustering_coefficients", ccoefs, parcellation_scheme, scale)
        show_means(temp_cc)
        
        #  Degree sequence
        print("Computing: Degree Sequence")
        test = OrderedDict()
        total_deg = OrderedDict(
            (subj, np.array(list(dict(nx.degree(graphs[subj], weight=weight)).values())))
            for subj in graphs
        )
        ipso_deg = OrderedDict()
        contra_deg = OrderedDict()
        for subj in graphs:  # TODO GK: remove forloop and use comprehension maybe?
            g = graphs[subj]
            N = len(list(g.nodes()))
            LLnodes = list(g.nodes())[0 : N / 2]  # TODO GK: don't assume hemispheres
            LL = g.subgraph(LLnodes)
            LLdegs = [LL.degree(weight=weight)[n] for n in LLnodes]

            RRnodes = list(g.nodes())[N / 2 : N]  # TODO GK: don't assume hemispheres
            RR = g.subgraph(RRnodes)
            RRdegs = [RR.degree(weight=weight)[n] for n in RRnodes]

            LRnodes = g.nodes()
            ipso_list = LLdegs + RRdegs
            degs = [g.degree(weight=weight)[n] for n in LRnodes]
            contra_deg[subj] = [a_i - b_i for a_i, b_i in zip(degs, ipso_list)]
            ipso_deg[subj] = ipso_list
        
        # import pdb; pdb.set_trace()

        deg = {"total_deg": total_deg, "ipso_deg": ipso_deg, "contra_deg": contra_deg}
        write(output_dir, "degree_distribution", deg, parcellation_scheme, scale)
        show_means(total_deg)
        
        #  Edge Weights
        print("Computing: Edge Weight Sequence")
        temp_ew = OrderedDict(
            (
                s,
                [
                    graphs[s].get_edge_data(e[0], e[1])[graphs[s].get_edge_data(e[0], e[1]).keys()[0]]
                    for e in graphs[s].edges()
                ],
            )
            for s in graphs
        )
        ew = temp_ew
        write(output_dir, "edges_weight", ew, parcellation_scheme, scale)
        show_means(temp_ew)

        # Eigen Values
        print("Computing: Eigen Value Sequence")
        laplac = OrderedDict(
            (subj, nx.normalized_laplacian_matrix(graphs[subj])) for subj in graphs
        )
        eigs = OrderedDict(
            (subj, np.sort(np.linalg.eigvals(laplac[subj].A))[::-1]) for subj in graphs
        )
        write(output_dir, "eigen_sequence", eigs, parcellation_scheme, scale)
        print(
            (
                "Subject Maxes: "
                + ", ".join(["%.2f" % np.max(eigs[key]) for key in list(eigs.keys())])
            )
        )

        # Betweenness Centrality
        print("Computing: Betweenness Centrality Sequence")
        nxbc = nx.algorithms.betweenness_centrality
        temp_bc = OrderedDict(
            (subj, list(nxbc(graphs[subj], weight=weight).values())) for subj in graphs
        )
        centrality = temp_bc
        write(output_dir, "betweenness_centrality", centrality, parcellation_scheme, scale)
        show_means(temp_bc)

        # Mean connectome
        print("Computing: Mean Connectome")
        nxnp = nx.to_numpy_matrix
        adj = OrderedDict(
            (subj, nxnp(graph, nodelist=sorted(graph.nodes())))
            for subj, graph in graphs.items()
        )
        mat = np.zeros(list(adj.values())[0].shape)
        for subj in adj:
            mat += adj[subj]
        mat = mat / len(list(adj.keys()))
        write(output_dir, "mean_connectome", mat, parcellation_scheme, scale)
            
    return mat
# Need to create group folder
mat = compute_network_metrics('/Users/sebastientourbier/Desktop/DS-test/derivatives/cmp/group', multiscale_graphs, 'L2018','fiber_density')  

> Process scale1
Computing: NNZ
Sample Mean: 1183.00
Computing: Max Local Statistic Sequence
Subject Means: 0.14, 0.14, 0.14
Computing: Clustering Coefficient Sequence
Subject Means: 0.01, 0.01, 0.01
Computing: Degree Sequence
Subject Means: 0.01, 0.01, 0.01
Computing: Edge Weight Sequence
Subject Means: 0.00, 0.00, 0.00
Computing: Eigen Value Sequence
Subject Maxes: 1.21, 1.21, 1.21
Computing: Betweenness Centrality Sequence
Subject Means: 0.04, 0.04, 0.04
Computing: Mean Connectome
> Process scale2
Computing: NNZ
Sample Mean: 2129.00
Computing: Max Local Statistic Sequence
Subject Means: 0.16, 0.16, 0.16
Computing: Clustering Coefficient Sequence
Subject Means: 0.01, 0.01, 0.01
Computing: Degree Sequence
Subject Means: 0.01, 0.01, 0.01
Computing: Edge Weight Sequence
Subject Means: 0.00, 0.00, 0.00
Computing: Eigen Value Sequence
Subject Maxes: 1.21, 1.21, 1.21
Computing: Betweenness Centrality Sequence
Subject Means: 0.02, 0.02, 0.02
Computing: Mean Connectome
> Process scale3
Compu