In [2]:
import numpy as np
import os
import scipy.io as sio

from scipy.optimize import linear_sum_assignment
from scipy.spatial import distance


In [14]:
# First, run the code in extract_precision_matrices.ipynb to obtain precision matrices: FCprec_concat_allsub.mat
# which is a .mat file with all subjects' precision FC matrices (C) in the format k*n*n where k = num subjects and n = number of ROIs

# Run unpack_precision_mats to split FCprec_concat_allsub.mat into single subject .mat files

cwd='/Users/emilyolafson/GIT/stroke-graph-matching/data/'
unpack_precision_mats(cwd)

# After unpack_precision_mats is run, format precision data:
# - Set diagonals of precision FC to 0.
# - create a .mat file that contains precision FCs in a cell array for input to graph matching. In this case,
# subjects in rows & longitudinal time points is in columns.

# MATLAB code for formatting this dataset: format_precision.m

# then run code below:

{'C': array([[ 5.83946277e-01, -1.82261558e-03, -1.72499417e-03, ...,
         1.52989333e-04,  9.74876573e-05,  4.69743582e-06],
       [-1.82261558e-03,  5.83694482e-01, -1.49624760e-03, ...,
        -2.71981816e-04,  4.40401566e-05,  1.48237634e-05],
       [-1.72499417e-03, -1.49624760e-03,  5.84020376e-01, ...,
        -4.33680565e-06,  4.98653653e-05,  2.27910545e-05],
       ...,
       [ 1.52989333e-04, -2.71981816e-04, -4.33680565e-06, ...,
         5.83504014e-01, -5.80117221e-05,  2.28329226e-05],
       [ 9.74876573e-05,  4.40401566e-05,  4.98653653e-05, ...,
        -5.80117221e-05,  5.83431158e-01, -7.40518634e-06],
       [ 4.69743582e-06,  1.48237634e-05,  2.27910545e-05, ...,
         2.28329226e-05, -7.40518634e-06,  5.83431101e-01]])}
{'C': array([[ 5.84479523e-01, -1.71654747e-03, -2.53551920e-03, ...,
        -5.40746153e-04,  6.10418489e-05,  9.90785629e-06],
       [-1.71654747e-03,  5.83825259e-01, -2.13059981e-03, ...,
         5.34478074e-04,  3.12614467e-05, 

{'C': array([[ 5.84640802e-01, -2.60444770e-03, -5.66716876e-04, ...,
         4.62252784e-04,  9.93534155e-05, -3.52556985e-05],
       [-2.60444770e-03,  5.84175181e-01, -2.11585268e-03, ...,
         5.68578022e-05, -5.28391363e-05, -1.62030163e-06],
       [-5.66716876e-04, -2.11585268e-03,  5.84561380e-01, ...,
         3.48192181e-04,  2.07653403e-05,  2.86200499e-05],
       ...,
       [ 4.62252784e-04,  5.68578022e-05,  3.48192181e-04, ...,
         5.83540790e-01, -9.36127790e-05, -4.91596078e-05],
       [ 9.93534155e-05, -5.28391363e-05,  2.07653403e-05, ...,
        -9.36127790e-05,  5.83433825e-01, -1.21828851e-05],
       [-3.52556985e-05, -1.62030163e-06,  2.86200499e-05, ...,
        -4.91596078e-05, -1.21828851e-05,  5.83433558e-01]])}
{'C': array([[ 5.84657889e-01, -5.28452668e-03, -3.26641291e-03, ...,
         4.18390274e-04, -3.09256720e-05, -3.57589963e-05],
       [-5.28452668e-03,  5.85545601e-01, -5.03451258e-03, ...,
         6.93109687e-04, -7.54636020e-05, 

{'C': array([[ 5.83726893e-01, -1.11065941e-03,  5.20578353e-04, ...,
        -5.49593479e-04, -7.29288329e-06, -5.33070575e-05],
       [-1.11065941e-03,  5.83686367e-01,  4.15445054e-04, ...,
        -3.66526058e-04, -5.71236979e-07, -8.77588193e-05],
       [ 5.20578353e-04,  4.15445054e-04,  5.84782905e-01, ...,
         1.29423202e-03,  4.30669493e-05,  1.01268159e-04],
       ...,
       [-5.49593479e-04, -3.66526058e-04,  1.29423202e-03, ...,
         5.83663675e-01, -4.67103884e-05, -1.80239156e-04],
       [-7.29288329e-06, -5.71236979e-07,  4.30669493e-05, ...,
        -4.67103884e-05,  5.83431648e-01, -2.76032306e-05],
       [-5.33070575e-05, -8.77588193e-05,  1.01268159e-04, ...,
        -1.80239156e-04, -2.76032306e-05,  5.83433586e-01]])}
{'C': array([[ 5.83771904e-01, -1.08786045e-03,  3.11230288e-03, ...,
        -5.66599331e-04,  2.94445115e-05,  7.47129511e-05],
       [-1.08786045e-03,  5.83639007e-01,  1.33904332e-03, ...,
        -2.08946243e-04, -9.34081340e-06, 

In [35]:
cwd = os.getcwd()
data_dir = '/Users/emilyolafson/GIT/stroke-graph-matching/data'
fc_dir =data_dir + '/precision/stroke/'
fc=sio.loadmat(fc_dir + '/C_precision.mat')
fc=fc['C_precision']
dx='stroke'
intervals=[[0, 1], [1, 2], [2, 3], [3, 4]] #time point comparisons. 0=1, 1=2, etc.
alpha=0
alphacounter=0
betacounter=0
session_swaps=list()
for i in range(0, 4):
    run_graph_matching_reg(intervals[i], fc)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46


In [30]:
print(fc.shape)


(47, 5)


In [34]:
def unpack_precision_mats(cwd):
    """Turn concatenated precision FC matrix into multiple single subject/single session matrices.
    Save each single suject to /precision/ folder."""
    data_dir = str(cwd) 
    subj_all = np.genfromtxt(data_dir + 'precision/subjects.txt', dtype = 'str') #list of subjects in same order as precision matrices are saved.    
    fc_dir = data_dir + 'precision/'
    output_prec = sio.loadmat(fc_dir + 'FCprec_concat_allsub.mat')
    prec = output_prec['C'] 
    c = 0
    for sub in subj_all:
        
        mdic = {"C": prec[c]}
        print(mdic)
        sio.savemat(fc_dir + sub, mdic)
        c=c+1
        
def remap_cost_regularized(interval, fc):
    """Cost function: the cost of remapping each node in matrix A to every other node in matrix B. 
    Output is fed into graph matching algorithm. Regularization: (costmatrix - alpha*I) 
    where alpha is a regularization parameter controlling the degree to which a remapping
    to itself is preferred over remapping to any other node."""
    cwd = os.getcwd()
    data_dir = '/Users/emilyolafson/GIT/stroke-graph-matching/data'

    costmat_all=list()

    nROIs=fc[0][0].shape[0]
    
    #calculate cost between all ROIs
    for i in range(0,fc.shape[0]): #subject 
        print(i)
        if (dx == 'stroke' and i==5 and interval == [3,4]): #sub6 has no 4,5
            continue 
        if (dx == 'stroke' and i==11 and (interval == [3,4] or interval == [2,3])): #sub12 has no 4,5 OR 3,4
            continue
        if (dx == 'stroke' and i==19 and (interval == [3,4] or interval == [2,3] or interval == [1,2])): #sub20 has no 4,5 OR 3,4 OR 2,3
            continue
            
        costmat=np.zeros((nROIs,nROIs))

        for x in range(0,nROIs): #x = time point 1.
            a=fc[i,interval[0]][x]

            for y in range(0,nROIs): #y = time point 2.
                b=fc[i,interval[1]][y]
                costmat[x,y]=distance.euclidean(a,b)
                
        costmat_all.append(costmat)

    return [costmat_all, nROIs]

def graph_matching_regularized(costmat_all, interval, nROIs):
    """Runs graph matching w/ the Hungarian algorithm and saves outputs:
    
        cols_SXSY.txt - each row is a different subject. Values in each column represent the node
        in the latter time point that the node in the prior time point was mapped to.
        
        roichanges_SXSY.txt - each row is a different subject. Values in each column represent 
        whether the node was remapped to a DIFFERENT node than itself in the subsequent time point 
        (elements are 0 or 1, 1 if the node was remapped to a different node and 0 if the node was mapped to itself.
        Used to calculate remap frequency for each region, when averaged vertically (across subjects)."""
    
    results_dir = '/Users/emilyolafson/GIT/stroke-graph-matching/project/results/stroke/precision/'
    nROIs=fc[0][0].shape[0]
    nsubs=len(costmat_all) #test
    
    rows=np.zeros((nsubs,nROIs))
    cols=np.zeros((nsubs,nROIs))
    roichanges=np.zeros((nsubs,nROIs))
    truecols=range(0,nROIs)

    for i in range(0, nsubs):
        cost=costmat_all[i]
        rowind, colind=linear_sum_assignment(cost*10000) #graph matching algorithm. Need to scale the cost matrix so no value is < 1 to avoid rounding.
        rows[i]=rowind
        cols[i]=colind

    np.savetxt(results_dir+'/cols_'+ 'S'+str(interval[0]+1)+'S'+ str(interval[1]+1)+'_alpha' + str(alphacounter)+'_beta' + str(betacounter) + '.txt', cols)

    for j in range(0,nsubs):
        for i in range(0,nROIs):
            if cols[j][i]!=truecols[i]:
                roichanges[j][i]=1 #indices that are switched

    allchanges=np.mean(roichanges,0)
    np.savetxt(results_dir +'/roichanges_' + 'S'+str(interval[0]+1)+'S'+ str(interval[1]+1) +'_alpha' + str(alphacounter)+'_beta' + str(betacounter) +'.txt', allchanges)
    return [allchanges]

def run_graph_matching_reg(interval, fc):
    """Calculate the cost of remapping across all subjects, and perform graph matching.
    Saves outputs to /results/jupyter/.../stroke and /controls."""
    [costmat_all, nROIs]=remap_cost_regularized(interval, fc)
    nswaps=graph_matching_regularized(costmat_all, interval, nROIs)
    return nswaps
