In [None]:
# In order for this to work, you will need to change paths in the following cells:
# 3,8

In [35]:
# import necessary libraries
import os
from glob import glob
import numpy as np
import pandas
from copy import deepcopy

import statsmodels.formula.api as smf
from statsmodels.sandbox.stats.multicomp import multipletests as fwe
from statsmodels.sandbox.stats.multicomp import fdrcorrection0 as fdr

In [4]:
# get ROIS
rois=pandas.read_table('/Users/jakevogel/Downloads/desikan.txt',header=None)
rois.columns=["rois"]
for i,x in enumerate(rois.index.tolist()):
    print(i,rois.ix[x,'rois'])

0 lh-unknown
1 lh-bankssts
2 lh-caudalanteriorcingulate
3 lh-caudalmiddlefrontal
4 lh-corpuscallosum
5 lh-cuneus
6 lh-entorhinal
7 lh-fusiform
8 lh-inferiorparietal
9 lh-inferiortemporal
10 lh-isthmuscingulate
11 lh-lateraloccipital
12 lh-lateralorbitofrontal
13 lh-lingual
14 lh-medialorbitofrontal
15 lh-middletemporal
16 lh-parahippocampal
17 lh-paracentral
18 lh-parsopercularis
19 lh-parsorbitalis
20 lh-parstriangularis
21 lh-pericalcarine
22 lh-postcentral
23 lh-posteriorcingulate
24 lh-precentral
25 lh-precuneus
26 lh-rostralanteriorcingulate
27 lh-rostralmiddlefrontal
28 lh-superiorfrontal
29 lh-superiorparietal
30 lh-superiortemporal
31 lh-supramarginal
32 lh-frontalpole
33 lh-temporalpole
34 lh-transversetemporal
35 rh-unknown
36 rh-bankssts
37 rh-caudalanteriorcingulate
38 rh-caudalmiddlefrontal
39 rh-corpuscallosum
40 rh-cuneus
41 rh-entorhinal
42 rh-fusiform
43 rh-inferiorparietal
44 rh-inferiortemporal
45 rh-isthmuscingulate
46 rh-lateraloccipital
47 rh-lateralorbitofrontal


In [5]:
# separate into lobes
lobes={'frontal': [2, 3, 12, 14, 17, 18, 19, 20, 24, 26, 27, 28, 32, 37, 38, 47, 49, 52, 53, 54, 55, 59, 61, 62, 63, 67]}
lobes.update({'parietal': [8, 10, 22, 23, 25, 29, 31, 43, 45, 57, 58, 60, 64, 66]})
lobes.update({'temporal': [1, 6, 7, 9, 15, 16, 30, 33, 34, 36, 41, 42, 44, 50, 51, 65, 68, 69]})
lobes.update({'occipital': [5, 11, 13, 21, 40, 46, 48, 56]})
#nlist=[(x-1) for x in lobes['frontal']]
#print nlist
#lobes.update({'frontal': nlist})
print(lobes)

    

{'temporal': [1, 6, 7, 9, 15, 16, 30, 33, 34, 36, 41, 42, 44, 50, 51, 65, 68, 69], 'frontal': [2, 3, 12, 14, 17, 18, 19, 20, 24, 26, 27, 28, 32, 37, 38, 47, 49, 52, 53, 54, 55, 59, 61, 62, 63, 67], 'occipital': [5, 11, 13, 21, 40, 46, 48, 56], 'parietal': [8, 10, 22, 23, 25, 29, 31, 43, 45, 57, 58, 60, 64, 66]}


In [6]:
# get frontal ROIs
frois = []
for i,x in enumerate(rois.index.tolist()):
    if x in lobes['frontal']:
        frois.append(rois.ix[x,'rois'])
        
print(frois)

['lh-caudalanteriorcingulate', 'lh-caudalmiddlefrontal', 'lh-lateralorbitofrontal', 'lh-medialorbitofrontal', 'lh-paracentral', 'lh-parsopercularis', 'lh-parsorbitalis', 'lh-parstriangularis', 'lh-precentral', 'lh-rostralanteriorcingulate', 'lh-rostralmiddlefrontal', 'lh-superiorfrontal', 'lh-frontalpole', 'rh-caudalanteriorcingulate', 'rh-caudalmiddlefrontal', 'rh-lateralorbitofrontal', 'rh-medialorbitofrontal', 'rh-paracentral', 'rh-parsopercularis', 'rh-parsorbitalis', 'rh-parstriangularis', 'rh-precentral', 'rh-rostralanteriorcingulate', 'rh-rostralmiddlefrontal', 'rh-superiorfrontal', 'rh-frontalpole']


In [7]:
# define subfunction

def build_statement(dvar,varlist,inter=None): ##dvar = dependent variables, varlist = independent variables
    '''dvar is title of the column name in your dataframe corresponding to your dependent variable
    
    varlist is a list of columns in your spreadsheet that you want to include as covariates
    
    inter should be a list of tuples. the tuples should each contain two items, namely the two column
    names of the variables you want to interact.'''
    
    if inter:
        jnk = []
        try:
            for tup in inter:
                jnk.append(tup[0])
                jnk.append(tup[1])
            for col in list(set(jnk)):
                if col not in varlist:
                    raise ValueError('you have columns in inter that are not in varlist')
        except:
            raise ValueError('argument inter is not set properly. Should be a list of tuples containing pairs of column names')
        
        for tup in inter:
            varlist.append('%s*%s'%(tup[0],tup[1]))
    
    if len(varlist) == 1:
        stmnt = '%s ~ %s'%(dvar,varlist[0])
    else:
        stmnt = '%s ~ %s +'%(dvar,varlist[0])
        for i,v in enumerate(varlist):
            if i != 0:
                if i != (len(varlist) - 1):
                    stmnt = '%s %s +'%(stmnt,v)
                else:
                    stmnt = '%s %s'%(stmnt,v)
    return stmnt
    
    

In [45]:
# define main function

def get_results(df,ref,vardf,varlist,params=None,inter=None,corr=None,rois=None):
    '''- df is a the first output of the nodewise_reg
    - ref is the second output of nodewise_reg
    - vardf and varlist are the same as input into nodewise_reg
    
    - params is a list indicating which model parameters (i.e. in varlist) you want
    output statistics for. It is a list of integers corresponding to the order in which
    the variables are placed in varlist and interaction, respectively. Example:
    
    varlist = ['tiv','age','dx']
    int = [(age,dx)]
    
    to return just age, params should be set to [2]
    to return age, dx, and the interaction, params should be set to [2,3,4]
    to return just the intercept, params should be set to [0]
    
    If no parameters are passed, will only return (unlabeled) stats for the first item in varlist
    
    - inter should be a list of tuples. the tuples should each contain two items, namely the two column
    names of the variables you want to interact.
    
    If inter is None, no interactions will be assessed
    
    - corr can be set to 'FDR' or 'FWE'. Otherwise, will not perform any p-corrections.
    
    - rois should be a list of roi labels that corresponds exactly to the order that ROIs were passed in
    nodewise_reg
    
    '''

    if not params:
        params = [1] 
    
    if len(params) == 1:
        cols = ['t','p']
    else:
        cols = []
        for i in params:
            if i == 0:
                var = 'int'
            elif 0 < i <= len(varlist):
                var = varlist[i-1]
            else:
                var = 'inter_%s'%(i-len(varlist))
            cols.append('%s_t'%(var))
            cols.append('%s_p'%(var))

    resdf=pandas.DataFrame(index=df.columns[:(-1*len(vardf.columns))],columns=cols)
    
    print('running models')
    
    for cxn in df.columns[:(-1*len(vardf.columns))]: 
        statement=build_statement(cxn,varlist,inter) 
        lm=smf.ols(statement,data=df).fit()
        for i,p in enumerate(params):
            x = i*2
            resdf.ix[cxn,resdf.columns[x]]=lm.tvalues[p]
            resdf.ix[cxn,resdf.columns[x+1]]=lm.pvalues[p]
    
    if corr == 'FDR':
        for x in range(1,len(resdf.columns),2):
            fdrtest=fdr(np.array(resdf[:][resdf.columns[x]].tolist()))
            var = resdf.columns[x].split('_')[0]
            for i,cxn in enumerate(df.columns[:(-1*len(vardf.columns))]):          # also fixed this
                resdf.ix[cxn,'FDR_%s'%(var)]=fdrtest[1][i]

    if corr == 'FWE':
        for x in range(1,len(resdf.columns),2):
            fwetest=fwe(np.array(resdf[:][resdf.columns[x]].tolist()))
            for i,cxn in enumerate(df.columns[:(-1*len(vardf.columns))]):          # and this   
                resdf.ix[cxn,'FWE_%s'%(var)]=fdrtest[1][i]
    
    ### ALSO ADDING THIS, FOR COMPATIBILITY BUT ALSO FOR IMPROVEMENT
    nind = []
    for cxn in resdf.index.tolist():
        tup = ref.ix[cxn,'smthng']
        if rois:
            if type(tup) == tuple:
                nlab = '%s-%s'%(rois[tup[0]],rois[tup[1]])
                nind.append(nlab)
            elif type(tup) == str:
                tup = tup.replace('(','').replace(')','').replace(',','').split()
                nlab = '%s-%s'%(rois[int(tup[0])],rois[int(tup[1])])
                nind.append(nlab)
        else:
            nind.append(tup)
    resdf.index = nind
    
    # FINALLY TO MAKE THE INPUT DF READABLE AGAIN...
    ndf = deepcopy(df)
    ncols = []
    for col in df.columns.tolist():
#        if col in ref.index.tolist():
        if 'col_' in col:
            tup = ref.ix[col,'smthng']
            if rois:
                if type(tup) == tuple:
                    nlab = '%s-%s'%(rois[tup[0]],rois[tup[1]])
                    ncols.append(nlab)
                elif type(tup) == str:
                    tup = tup.replace('(','').replace(')','').replace(',','').split()
                    nlab = '%s-%s'%(rois[int(tup[0])],rois[int(tup[1])])
                    ncols.append(nlab)
            else:
                ncols.append(tup)
        else:
            ncols.append(col)
    
    ndf.columns = ncols
                
    return resdf, ndf

In [20]:
ref.index

Index(['col_160', 'col_129', 'col_128', 'col_127', 'col_126', 'col_125',
       'col_124', 'col_119', 'col_113', 'col_121',
       ...
       'col_78', 'col_79', 'col_76', 'col_77', 'col_74', 'col_75', 'col_72',
       'col_73', 'col_70', 'col_71'],
      dtype='object', length=165)

In [46]:
# load inputs
vardf = pandas.ExcelFile('/Users/jakevogel/Downloads/frocc_df.xls').parse('Sheet1')
ref = pandas.ExcelFile('/Users/jakevogel/Downloads/jnk.xls').parse('Sheet1')
df = pandas.ExcelFile('/Users/jakevogel/Downloads/dfout (1).xls').parse('Sheet1')
varlist = ['age']

In [47]:
# test function
resdf,ndf = get_results(df,ref,vardf,varlist,params=None,inter=None,corr='FDR',rois=frois)

running models


In [48]:
# now ndf is what you're looking for!
ndf

Unnamed: 0,lh-caudalanteriorcingulate-lh-caudalmiddlefrontal,lh-caudalanteriorcingulate-lh-medialorbitofrontal,lh-caudalanteriorcingulate-lh-parsopercularis,lh-caudalmiddlefrontal-lh-lateralorbitofrontal,lh-caudalmiddlefrontal-lh-medialorbitofrontal,lh-caudalmiddlefrontal-lh-paracentral,lh-caudalmiddlefrontal-lh-parsopercularis,lh-caudalmiddlefrontal-lh-parstriangularis,lh-caudalmiddlefrontal-lh-precentral,lh-caudalmiddlefrontal-lh-rostralanteriorcingulate,...,rh-parsorbitalis-rh-precentral,rh-parsorbitalis-rh-rostralmiddlefrontal,rh-parsorbitalis-rh-superiorfrontal,rh-parsorbitalis-rh-frontalpole,rh-parstriangularis-rh-precentral,rh-precentral-rh-frontalpole,rh-rostralmiddlefrontal-rh-superiorfrontal,rh-rostralmiddlefrontal-rh-frontalpole,rh-superiorfrontal-rh-frontalpole,age
p0007,1122.0,2598.0,2772.0,6232.0,3700.0,34847.0,16428.0,5305.0,7335.0,2952.0,...,8498.0,739.0,196.0,6171.0,2063.0,691.0,216.0,14911.0,10202.0,16.42
p0009,,,,,,,,,,,...,,,,,,,,,,13.25
p0022,172.0,2038.0,2142.0,1704.0,1715.0,16060.0,16807.0,233.0,3401.0,1403.0,...,7298.0,768.0,141.0,3061.0,3355.0,123.0,488.0,21149.0,15678.0,14.25
p0028,0.0,0.0,0.0,2301.0,178.0,19774.0,14346.0,2972.0,5739.0,4359.0,...,2828.0,660.0,102.0,3474.0,654.0,42.0,1139.0,22854.0,8914.0,15.00
p0032,0.0,50.0,50.0,3134.0,254.0,16923.0,6771.0,633.0,1760.0,1031.0,...,3347.0,0.0,0.0,3104.0,4328.0,0.0,398.0,19787.0,14841.0,11.42
p0033,0.0,0.0,0.0,3159.0,1194.0,15909.0,6546.0,1993.0,6927.0,3076.0,...,6659.0,812.0,144.0,4670.0,1287.0,599.0,198.0,24783.0,14875.0,9.92
p0048,1000.0,2785.0,2872.0,4118.0,3365.0,15197.0,18283.0,1378.0,4832.0,4611.0,...,5340.0,1894.0,0.0,7430.0,1280.0,91.0,2273.0,22864.0,9550.0,14.00
p0068,0.0,0.0,0.0,488.0,6543.0,9596.0,14720.0,1159.0,837.0,0.0,...,7502.0,0.0,99.0,1962.0,6485.0,0.0,145.0,3044.0,5500.0,15.08
p0072,0.0,0.0,0.0,2751.0,1340.0,4820.0,10811.0,2105.0,3911.0,987.0,...,159.0,127.0,0.0,976.0,355.0,0.0,1491.0,12390.0,4746.0,13.83
p0079,0.0,0.0,0.0,44.0,83.0,6090.0,15396.0,711.0,163.0,659.0,...,488.0,268.0,0.0,2094.0,424.0,0.0,6732.0,16139.0,4497.0,6.58


In [None]:
# look at output
resdf.sort_values('FDR_p')

In [None]:
# test multi-param
df.ix[:,'frocc'] = vardf.ix[:,'frocc']
varlist = ['age','frocc']
params = [1,2]
resdf,ndf = get_results(df,ref,vardf,varlist,params=params,inter=None,corr='FDR',rois=frois)

In [None]:
# look at output
resdf

In [None]:
# test interactions
inter = [('age','frocc')]
params = [1,2,3]
resdf,ndf = get_results(df,ref,vardf,varlist,params=params,inter=inter,corr='FDR',rois=frois)

In [None]:
# look at output
resdf

In [None]:
# fin

In [None]:
#### OUTPUT 4D RESULTS TO 3D IMAGE ####

# I'm running this after only running cells in this document up through the 10th cell. 
# Cells below that were not run, so if you want to reproduce what I did exactly, don't
# run it past the 10th cell

# Obviously when you do this yourself it won't matter because you'll be putting in your
# own inputs

In [59]:
u_rois = []
for roi in resdf.index.tolist():
    jnk = roi.split('-')
    roi1 = '%s_%s'%(jnk[0],jnk[1])
    roi2 = '%s_%s'%(jnk[-2],jnk[-1])
    if roi1 not in u_rois:
        u_rois.append(roi1)
    if roi2 not in u_rois:
        u_rois.append(roi2)

In [105]:
new_rois = {}
sorted(u_rois)
in_rois = {'lh-caudalanteriorcingulate':1,'lh-caudalmiddlefrontal': 2,'lh-medialorbitofrontal':12, 'lh-parsopercularis': 16,
          'lh-lateralorbitofrontal': 10,'lh-paracentral': 15, 'lh-parstriangularis': 18,'lh-precentral': 22,
          'lh-rostralanteriorcingulate': 24,'lh-rostralmiddlefrontal':25, 'lh-superiorfrontal': 26 }
for k,v in in_rois.items():
    jnk,reg = k.split('-')
    new_rois.update({'rh-%s'%(reg): v+39})
    new_rois.update({k:v})

new_rois

{'lh-caudalanteriorcingulate': 1,
 'lh-caudalmiddlefrontal': 2,
 'lh-lateralorbitofrontal': 10,
 'lh-medialorbitofrontal': 12,
 'lh-paracentral': 15,
 'lh-parsopercularis': 16,
 'lh-parstriangularis': 18,
 'lh-precentral': 22,
 'lh-rostralanteriorcingulate': 24,
 'lh-rostralmiddlefrontal': 25,
 'lh-superiorfrontal': 26,
 'rh-caudalanteriorcingulate': 40,
 'rh-caudalmiddlefrontal': 41,
 'rh-lateralorbitofrontal': 49,
 'rh-medialorbitofrontal': 51,
 'rh-paracentral': 54,
 'rh-parsopercularis': 55,
 'rh-parstriangularis': 57,
 'rh-precentral': 61,
 'rh-rostralanteriorcingulate': 63,
 'rh-rostralmiddlefrontal': 64,
 'rh-superiorfrontal': 65}

In [78]:
import nibabel as ni

In [102]:
def project_results_to_3D_atlas(atlas,roi_map,resdf,p_cols=[1],cutoff=0.05):
    """ 
    this function counts significant results for each ROI and projects it onto an existing atlas,
    writing that atlas to a nifti file
    
    atlas = path to a nifti image of a labeled atlas
    
    roi_map = a dictionary where roi labels (the same as in resdf.index) are matched to the corresponding
    number labels of the axis
    
    resdf = output from get_results
    
    p_cols = a list containing integers corresponding to which columns of resdf you want project results from
    
    cutoff = anything below this value will be considered a significant result
    """
    
    # Figure out how many significant connections occur at each node
    for p_col in p_cols:
        print('working on column %s'%(resdf.columns[p_col]))
        print('extracting connections')
        # Figure out how many significant connections occur at each node
        roi_vals = {}
        for roi in roi_map.keys():
            for cxn in resdf.index.tolist():
                if roi in cxn:
                    if resdf.ix[cxn,p_col] < cutoff:
                        if roi in roi_vals.keys():
                            roi_vals.update({roi: roi_vals[roi]+1})
                        else:
                            roi_vals.update({roi: 1})
    
        for roi in roi_map.keys():
            if roi not in roi_vals.keys():
                roi_vals.update({roi: 0})
    
        ### Project results to atlas
        # Load atlas
        print('loading atlas data')
        jnk = ni.load(atlas)
        atl = jnk.get_data()
        aff = jnk.affine
        
        # Remove unnecessary ROIs
        print('preparing result projection')
        unique = set(atl.flat)
        unique = [int(x) for x in unique]
        for u in unique:
            if u not in roi_map.values():
                atl[atl==u] = 0
        
        # Project results
        print('projecting results')
        for roi,val in roi_vals.items():
            label = roi_map[roi]
            atl[atl==label] = val
        
        # Write to file
        print('writing output')
        nimg = ni.Nifti1Image(atl,aff)
        nimg.to_filename('3d_results_%s'%(resdf.columns[p_col]))
        print('created new image %s'%('3d_results_%s'%(resdf.columns[p_col])))
        
    

In [106]:
atlas = '/Users/jakevogel/Dropbox/tmp/tau/dkt_atlas_1mm.nii.gz'
roi_map = new_rois
p_cols = [1,2]
project_results_to_3D_atlas(atlas,roi_map,resdf,p_cols,cutoff=0.05)
        

working on column p
extracting connections
loading atlas data
preparing result projection
projecting results
writing output
created new image 3d_results_p
working on column FDR_p
extracting connections
loading atlas data
preparing result projection
projecting results
writing output
created new image 3d_results_FDR_p


In [107]:
type(resdf[:]['t'])

pandas.core.series.Series