# Convert labels to neighbors
Turn a nifti file with ROIs to an adjecency matrix.

*Background:* the statistical analysis requires an adjacency matrix (or graph) for the samples. One simple approach is to mark all sames from the same ROI as adjacent as well as samples from adjacent ROIs. This is what the first part of the script achieves (but wasn't used for the final analysis).

Developed by André Altmann  https://github.com/andrealtmann/AIBS_FTD/


In [23]:
import os, sys
import numpy as np
import pandas as pd
import nibabel as nib
import scipy.spatial
import matplotlib
from matplotlib.pyplot import scatter

In [24]:
nifti_file = "../img_data/GIF_labels_mod.nii.gz"
selected_samples_file="../data/selected_samples_lh_for_FTD_AIBS.csv"

In [25]:

img_data  = os.path.join('./' , nifti_file)
label_map = nib.load(img_data)
label_dat = label_map.get_data()

FileNotFoundError: No such file or no access: './../img_data/GIF_labels_mod.nii.gz'

In [4]:
unique_labels = list(np.unique(label_dat).astype(int).astype(str))

NameError: name 'label_dat' is not defined

In [7]:
#remove 0
unique_labels.remove('0')

In [8]:
N=len(unique_labels)

In [9]:
A = pd.DataFrame(np.zeros([N,N], int))
A.columns=unique_labels
A.index=unique_labels

In [10]:
dims = label_dat.shape

In [11]:
#very brute force appraoch...
for x in range(1,dims[0]):
    sys.stderr.write(str(x))
    for y in range(1, dims[1]):
        for z in range(1, dims[2]):
            me = int(label_dat[x,y,z])
            if me > 0:
                for x2 in range(-1,0):
                    for y2 in range(-1,0):
                        for z2 in range(-1,0):
                            before = int(label_dat[x+x2,y+y2,z+z2])
                            if before > 0 and before != me:
                                #add to adjecency matrix
                                A.loc[str(me),str(before)] = 1
                                A.loc[str(before),str(me)] = 1


123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181

In [12]:
A.to_csv("../data/GIF_label_adj.csv")

# Load geodesic distance between samples
Pre-Computed using freesurfer

*Background:* A better approach is to build a graph on actual distances between samples, in the case of the brain geodesic distances are preferable over Euclidean distances. In order to obtain the geodesic distances used here, the AIBS samples were first mapped to the closest vertex in the FSA5 template. Next, the <code>mris_pmake</code> function of FreeSurfer was used to compute the geodesic distances from that vertex to all other vertices.

In [5]:
#map wellID to FSA5 vertex ID.
well2vid = pd.read_csv("../data/FTD_AIBS_sample2vid.csv", index_col=0)

In [7]:
#distance from each sample to all other FSA5 vertices
geo_dist = pd.read_csv("../data/fsa5_geo_FTDAIBS.csv.gz", header=None)
geo_dist.set_index(well2vid.index, inplace=True)

In [8]:
#geodesic distances between all pairs of samples
sample_geo_dist = geo_dist.loc[:,well2vid.vid]
sample_geo_dist.columns = well2vid.index

## Build adjecency matrix for AIBS samples

In [9]:
sample_info = pd.read_csv(selected_samples_file, index_col=0)

In [10]:
sample_info.shape

(1248, 19)

In [11]:
sample_geo_dist2 = sample_geo_dist.loc[sample_info.well_id,sample_info.well_id]

In [12]:
nSample = sample_info.shape[0]
sA = np.zeros([nSample,nSample], int)

## Let's make a subject-specific K-NN graph
This builds a K-NN (Nearest Neighbors) graph for samples only originating from the same subject (still stored only in one matrix, though). 

In [13]:
k=10


In [14]:
sAknn_subj = np.zeros([nSample,nSample], int)
for i in range(nSample):
    mylab    = sample_info.iloc[i,:].loc["probe2label"]
    mysubj   = sample_info.iloc[i,:].loc["sampleID"]
    
    same_subj = sample_info.loc[:,"sampleID"] == mysubj
    coord = sample_info.iloc[i,:].loc["corrected_mni_x":"corrected_mni_z"]
    sdists   = sample_geo_dist2.iloc[i,:]
    sdists.index = same_subj.index    
    cmax = max(sdists)
    sdists[~same_subj] = 10.0 * cmax
    cpy = np.copy(sdists)
    dc = getKnnCut(cpy,k)
    close_samples = (sdists <= dc)
    sAknn_subj[i,close_samples] = 1
    sAknn_subj[close_samples,i] = 1

NameError: name 'getKnnCut' is not defined

In [15]:
min(pd.DataFrame(sAknn_subj).apply(sum, axis=0))

0

In [16]:
sAknn_subjdf=pd.DataFrame(sAknn_subj)
sAknn_subjdf.index=sample_info.well_id
sAknn_subjdf.columns=sample_info.well_id

In [17]:
sAknn_subjdf.to_csv("../data/sample_adjecency_knn10_geo_subj.csv")


## Let's make a K-nn graph
This builds a K-NN (Nearest Neighbors) graph for the entire dataset. Ignoring that samples come from different subjects.


In [18]:
#set K
k=10

In [19]:
def getKnnCut(mydist,k):
    mydist.sort()
    dc = mydist[k]
    return(dc)

In [20]:
sAknn = np.zeros([nSample,nSample], int)
for i in range(nSample):
    mylab    = sample_info.iloc[i,:].loc["probe2label"]
    coord = sample_info.iloc[i,:].loc["corrected_mni_x":"corrected_mni_z"]
    sdists   = sample_geo_dist2.iloc[i,:]
    sdists.index = same_subj.index    

    cpy = np.copy(sdists)
    dc = getKnnCut(cpy,k)
    close_samples = (sdists <= dc)
    sAknn[i,close_samples] = 1
    sAknn[close_samples,i] = 1

In [21]:
sAknndf=pd.DataFrame(sAknn)
sAknndf.index=sample_info.well_id
sAknndf.columns=sample_info.well_id

In [22]:
sAknndf.to_csv("../data/sample_adjecency_knn10_geo.csv")