In [1]:
import pandas as pd
import numpy as np
import statsmodels as stt
import scipy.stats as sst
import os.path as osp
from statsmodels import api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import json
%matplotlib inline

In [2]:
print(osp.realpath(osp.curdir))
relative_path_filename = './data/2019-12-03-simple2_query_output.csv'
assert osp.exists(relative_path_filename)

/home/jb/code/repronim/simple2/simple2_analysis


In [3]:
hie = pd.read_csv(relative_path_filename, na_values='nd') #, low_memory=False)
original_col_names = list(hie)
# column names are unique
assert len(original_col_names) == len(set(original_col_names))
print(list(hie))
col_rename = {'federatedLabel':'structure'}
hie.rename(columns=col_rename, inplace=True)
print(list(hie))

['study', 'ID', 'Age', 'dx', 'Gender', 'FIQ', 'PIQ', 'VIQ', 'tool', 'softwareLabel', 'federatedLabel', 'laterality', 'volume']
['study', 'ID', 'Age', 'dx', 'Gender', 'FIQ', 'PIQ', 'VIQ', 'tool', 'softwareLabel', 'structure', 'laterality', 'volume']


  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
# mapping_file = '../segstats_jsonld/segstats_jsonld/mapping_data/freesurfermap.json'
mapping_file = '../segstats_jsonld/segstats_jsonld/mapping_data/freesurfer-cdes.json'
assert osp.exists(mapping_file)
with open(mapping_file, "r") as read_file:
    roi_map = json.load(read_file)

In [5]:
ube2h = {}
label2ube = {}
countok=0
has_no_isAbout = []
has_no_label = []

for (k,v) in roi_map.items():
    
    # discard 'count'
    if k == 'count': pass
    
    # v is a dict that contains the CDE - check that we have a isAbout and label    
    elif 'isAbout' in v:
        countok += 1
        if 'label' in v:
            if v['label'] != '' and v['label'] not in ('None','none'):
                #ube['<' + v['isAbout'] + '>'] = v['label']
                #ebu[v['label']] = '<' + v['isAbout'] + '>'
                label2ube[v['label']] = v['isAbout']
                if v['isAbout'] not in ube2h.keys():
                    no_right_or_left = v['label']
                    no_right_or_left = no_right_or_left.replace('Right-','')
                    no_right_or_left = no_right_or_left.replace('Right ','')
                    no_right_or_left = no_right_or_left.replace('Left-','')
                    no_right_or_left = no_right_or_left.replace('Left ','')
                    no_right_or_left = no_right_or_left.replace(' NVoxels','')
                    no_right_or_left = no_right_or_left.replace(' (mm^3)','')
                    ube2h[v['isAbout']] = no_right_or_left
            else:
                has_no_label.append(k)

assert has_no_isAbout == []
assert countok == len(label2ube)

In [6]:
label2ube;
ube2h
h2ube = {v: k for k, v in ube2h.items()}

In [7]:
def split_merge_df(df, indx='ID', spliton='laterality', levels=['Left','Right'], 
                       keep_col='volume', op='+', colrename=None):
    """
    1. split the df according to 2 (n?) levels of "spliton"
    2. merge the 2 (n?) df using indx as index
    3. keep only "keep_col" for the right temporary df
    4. perform op on the columns "keep_col" and name it 'keep_col'+'_'+ levels[0] + op + levels[1]    
    
    example: for adding volumes in right and left structures
    """
    
    dflev1 = df[df[spliton]==levels[0]]
    dflev2 = df[df[spliton]==levels[1]] 

    # check that the new dfs have no duplicates in the indx

    assert set(dflev1[indx]) == set(dflev2[indx])
    assert len(set(dflev1[indx])) == len(dflev1[indx])
    
    # assert len(set(dflev2[indx])) == len(dflev2[indx])
    # suffixes=('_l','_r')
    merged_inner = pd.merge(left=dflev1, right=dflev2[[indx,keep_col]], 
                            left_on=indx, right_on=indx, suffixes=levels, how='inner')
#    merged_inner.rename(columns={cols+'_x': cols+'_'+lev1, cols+'_y': cols+'_'+lev2}, inplace=True)

    # sum keep_col values in a new column
    add_col_name = keep_col + levels[0] + op + levels[1]
    if op == '+':
        merged_inner[add_col_name] = \
                        merged_inner[keep_col+levels[0]] + merged_inner[keep_col+levels[1]]  
    if colrename is not None:
        merged_inner.rename(columns={add_col_name:colrename}, inplace=True)
    return merged_inner

"""
    if droplist != []:
        for colname in droplist:
            colname_y = colname + '_y'
            colname_x = colname + '_x'
            merged_inner.drop(colname_y, axis=1, inplace=True)
            merged_inner.rename(columns={colname_x: colname}, inplace=True)
""";

In [8]:
tooldic = {'surfer':'https://surfer.nmr.mgh.harvard.edu/', 
       'fsl':'http://purl.org/nidash/fsl#',
       'ants':'http://stnava.github.io/ANTs/'}
normalDev = (2, '2', 'Typically Developing Children')
adhd = (1, '1', 'ADHD-Combined', 'ADHD-Hyperactive/Impulsive', 'ADHD-Inattentive')


def define_conditions(df, tooldic={}, normalDev=(), adhd=(), h2ube={}):
    """
    create dic of conditions
    
    """
    diccond={}
    diccond['left'] = ((df['laterality'] == 'L')|(hie['laterality'] == 'Left'))
    diccond['right'] = ((df['laterality'] == 'R')|(hie['laterality'] == 'Right'))
    diccond['latNan'] = (df['laterality'] != 'Right') & (df['laterality'] != 'Left')

    #========== age 
    diccond['age<=20'] = (df['Age'] <= 20)
    diccond['age<20'] = (df['Age'] < 20)
    diccond['age<12'] = (df['Age'] < 12)
    diccond['age>=12'] = (df['Age'] >= 12)
    
    #========== tool conditions
    diccond['fs'] = (df['tool'] == tooldic['surfer'])
    diccond['ants'] = (df['tool'] == tooldic['ants'])
    diccond['fsl'] = (df['tool'] == tooldic['fsl'])
    
    #========== IQ conditions
    diccond['fiq>0'] = (df['FIQ'] > 0 )
    
    #========== disease conditions
    pop_normDev = False
    for pop in normalDev:
        # print(np.sum(pop_cond))
        pop_normDev = pop_normDev | (df['dx'] == pop)
    diccond['normDev'] =  pop_normDev
    
    pop_adhd = False
    for pop in adhd:
        # print(np.sum(pop_cond))
        pop_adhd = pop_adhd | (df['dx'] == pop)
    diccond['adhd'] =  pop_adhd

    #=========== ROIs
    roi_ub = ''
    diccond['bvol'] = (df['softwareLabel'] == 'BVOL (mm^3)')
    diccond['brainseg'] = (df['softwareLabel'] == 'Brain Segmentation Volume (mm^3)') 
    diccond['caudate'] = (df['structure'] == h2ube['Caudate'])
    diccond['putamen'] = (df['structure'] == h2ube['Putamen'])
    diccond['TIV'] = (df['structure'] == h2ube['Estimated Total Intracranial Volume'])
#
    diccond['wm'] =  (df['structure'] == h2ube['hemisphere cerebral white matter volume'])
    diccond['gm'] =  (df['structure'] == h2ube['Total gray matter volume'])
    diccond['csf'] =  (df['structure'] == h2ube['CSF'])
#
    diccond['wmfsl'] =  (df['softwareLabel'] == 'white (mm^3)')
    diccond['gmfsl'] =  (df['softwareLabel'] == 'gray (mm^3)')
    diccond['csffsl'] =  (df['softwareLabel'] == 'csf (mm^3)')
#
    diccond['ccant'] =  (df['structure'] == h2ube['CC_Anterior'])
    diccond['cccen'] =  (df['structure'] == h2ube['CC_Central'])
    diccond['ccpos'] =  (df['structure'] == h2ube['CC_Posterior'])

    #=========== site
    diccond['abide'] = (df['study'].str.contains("ABIDE"))
    diccond['adhd200'] = (df['study'].str.contains("ADHD"))
    
    #=========== Gender
    diccond['male'] = (df['Gender']=='Male')
    diccond['female'] = (df['Gender']=='Female')

    return diccond


In [9]:
condic = define_conditions(hie, tooldic=tooldic, normalDev=normalDev, adhd=adhd, h2ube=h2ube)
condic.keys()

dict_keys(['left', 'right', 'latNan', 'age<=20', 'age<20', 'age<12', 'age>=12', 'fs', 'ants', 'fsl', 'fiq>0', 'normDev', 'adhd', 'bvol', 'brainseg', 'caudate', 'putamen', 'TIV', 'wm', 'gm', 'csf', 'wmfsl', 'gmfsl', 'csffsl', 'ccant', 'cccen', 'ccpos', 'abide', 'adhd200', 'male', 'female'])

In [10]:
def apply_cond(df, cndc, conditions, dropnaset=[],columns={}):
    """
    List of conditions
    """
    cond = np.full((len(df),), True, dtype=bool)
    
    for c in conditions:
        cond = cond & cndc[c]
        # print(np.sum(cond))
        
    # condition = [cond & cndc[c] for c in conditions][0]
    # print(len(condition),np.sum(condition))
    
    # make a copy
    tmp = df.loc[cond].dropna(subset=dropnaset)
    if columns:
        tmp.rename(columns=columns, inplace=True)
        
    if len(tmp) == 0:
        print('Warning, len(df)==0')
        
    return tmp
    

In [11]:
softw = 'ants'

In [12]:
# hyp1 = ['bvol',softw,'left','age<=20','fiq>0','normDev']
# hyp1 = ['fiq>0','abide','caudate', 'putamen']
hyp1 = ['female', 'caudate', softw,'fiq>0'] # ,'abide']
condic = define_conditions(hie, tooldic=tooldic, normalDev=normalDev, adhd=adhd, h2ube=h2ube)
tmp = apply_cond(hie, condic, hyp1)
# print('apply:', len(tmp.dropna(subset=['FIQ', 'volume', 'Gender'])))

manual = hie.loc[(hie['Gender']=='Female') & 
#                 (hie['study'].str.contains("ABIDE")) &
                 (hie['structure'] == h2ube['Caudate']) &
                 (hie['tool'] == tooldic[softw]) & 
                 (hie['FIQ'] > 0) ] 
manual = manual.dropna(subset=['FIQ', 'volume', 'Gender']) #,inplace=True)
print('manual: ',len(manual))
assert len(tmp.dropna(subset=['FIQ', 'volume', 'Gender'])) == len(manual)


manual:  342


In [13]:
#cond = ['abide'] #,'fiq>0'] # ,'abide']
cond = ['adhd200'] #,'fiq>0'] # ,'abide']
dropnaset = [] #'FIQ']
tmp_df = apply_cond(hie, condic, cond, dropnaset=dropnaset, columns={'volume':'brainvol'})
len(set(tmp_df['ID']))

208

In [14]:
structures = set(hie.structure)


Hypotheses

PIET-1: Total Brain Volume will positively correlate with IQ (in both sexes across the complete age range).

MAC-1: Left striatum volume (caudate + putamen) will positively correlate with IQ in the total (male + female) child (age < 20) group.

MAC-2: Left striatum volume (caudate + putamen) will positively correlate with IQ in the male children group.

MAC-3: Left striatum volume (caudate + putamen) will not correlate with IQ in the female children group.

GANJ-1: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will negatively correlate with IQ.

GANJ-2: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will negatively correlate with IQ in the young (age < 12) group.

GANJ-3: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will not significantly correlate with IQ in the adolescent (age > 12) group.

GANJ-4:. Total Corpus Callosum midsagittal area, after correcting for total brain volume, will negatively correlate with IQ in the male (age < 12) group.

GANJ-5: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will not significantly correlate with IQ in the female (age < 12) group.


In [15]:
resdic = {}


def md2dic(varname, hyp_name, cond, mdf=None):
    table2 = mdf.summary().tables[1].data
    cols = table2[0]
    #print(cols)
    #print(table2)
    ther = [r for r in table2 if r[0] == varname][0]
    resdic = {}
    resdic[hyp_name] = {'P>|t|':ther[cols.index('P>|t|')], 
             't':ther[cols.index('t')], 
             'rsquared_adj':"{:4.3}".format(md.fit().rsquared_adj),
             'nobs': "{:3d}".format(int(md.nobs)),
             'conditions': cond
            }

    return(resdic)

### PIET-1: Total Brain Volume will positively correlate with IQ (in both sexes across the complete age range).


In [16]:
condic = define_conditions(hie, tooldic=tooldic, normalDev=normalDev, adhd=adhd, h2ube=h2ube)
print(condic.keys())
dropnaset = ['FIQ', 'volume', 'Gender']


dict_keys(['left', 'right', 'latNan', 'age<=20', 'age<20', 'age<12', 'age>=12', 'fs', 'ants', 'fsl', 'fiq>0', 'normDev', 'adhd', 'bvol', 'brainseg', 'caudate', 'putamen', 'TIV', 'wm', 'gm', 'csf', 'wmfsl', 'gmfsl', 'csffsl', 'ccant', 'cccen', 'ccpos', 'abide', 'adhd200', 'male', 'female'])


In [17]:
hyp1 = ['bvol', 'fiq>0','normDev', softw] # ,'ants','abide']
hyp1_df = apply_cond(hie, condic, hyp1, dropnaset=dropnaset, columns={'volume':'brainvol'})
print(len(hyp1_df))

hyp1_fsl = ['fiq>0','normDev', 'fsl']
fsl_gm = hyp1_fsl + ['gmfsl'] #
fsl_wm = hyp1_fsl + ['wmfsl'] #
fsl_csf = hyp1_fsl + ['csffsl'] #

fsl_gm_df = apply_cond(hie, condic, fsl_gm, dropnaset=dropnaset, columns={'volume':'fsl_gm'})
fsl_wm_df = apply_cond(hie, condic, fsl_wm, dropnaset=dropnaset, columns={'volume':'fsl_wm'})
fsl_csf_df = apply_cond(hie, condic, fsl_csf, dropnaset=dropnaset, columns={'volume':'fsl_csf'})

fsl_total = pd.merge(left=fsl_gm_df, right=fsl_wm_df[['ID','fsl_wm']], left_on='ID', right_on='ID')
fsl_total = pd.merge(left=fsl_total, right=fsl_csf_df[['ID','fsl_csf']], left_on='ID', right_on='ID')
fsl_total['brainvol'] = fsl_total['fsl_csf'] + fsl_total['fsl_wm'] + fsl_total['fsl_gm']
print(len(fsl_total))
if softw == 'fsl':
    tmp = fsl_total
    hyp1 = hyp1_fsl
else:
    tmp = hyp1_df

461
573


In [18]:
print(" Structure = ", 'brainvol')
#assert ube2h[tmp.iloc[0]['structure']] == roi

iq = 'FIQ'

# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
md = smf.ols(iq + " ~ Q('brainvol') + study ", data=tmp) #  
mdf = md.fit()
print(mdf.summary())

varname = "Q('brainvol')"
hyp_name = 'PIET-1'
resdic.update(md2dic(varname, hyp_name, hyp1, mdf=mdf))

 Structure =  brainvol
                            OLS Regression Results                            
Dep. Variable:                    FIQ   R-squared:                       0.147
Model:                            OLS   Adj. R-squared:                  0.104
Method:                 Least Squares   F-statistic:                     3.434
Date:                Fri, 20 Dec 2019   Prob (F-statistic):           4.18e-07
Time:                        12:30:28   Log-Likelihood:                -1800.5
No. Observations:                 461   AIC:                             3647.
Df Residuals:                     438   BIC:                             3742.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                                    coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------

In [19]:
resdic

{'PIET-1': {'P>|t|': ' 0.001',
  't': '    3.442',
  'rsquared_adj': '0.104',
  'nobs': '461',
  'conditions': ['bvol', 'fiq>0', 'normDev', 'ants']}}

#### Create df and conditions for left striatum, all age all gender

In [None]:
cond_mac = ['fiq>0','normDev', softw,'left'] # ,'age<20']
mac_caud = cond_mac + ['caudate'] #
mac_put = cond_mac + ['putamen'] #
mac_tiv = cond_mac + ['fiq>0','normDev',softw,'TIV'] #

left_caud = apply_cond(hie, condic, mac_caud, dropnaset=dropnaset, columns={'volume':'caudate'})
left_put = apply_cond(hie, condic, mac_put, dropnaset=dropnaset, columns={'volume':'putamen'})

left_stria = pd.merge(left=left_caud, right=left_put[['ID','putamen']], left_on='ID', right_on='ID')
left_stria['striatum'] = left_stria['caudate'] + left_stria['putamen']
left_stria_condic = define_conditions(left_stria, tooldic=tooldic, normalDev=normalDev, adhd=adhd, h2ube=h2ube)
print(len(left_stria))



### MAC-1: Left striatum volume (caudate + putamen) will positively correlate with IQ in the total (male + female) child (age < 20) group.


In [None]:
mac1_cond = ['age<20']
mac1 = apply_cond(left_stria, left_stria_condic, mac1_cond)
print(len(mac1))


iq = 'FIQ'

# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
# md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('striatum') + study ", data=mac1) #  
mdf = md.fit()
print(mdf.summary())


varname = "Q('striatum')"
hyp_name = 'MAC-1'
resdic.update(md2dic(varname, hyp_name, cond_mac+mac1_cond, mdf=mdf))

### MAC-2: Left striatum volume (caudate + putamen) will positively correlate with IQ in the male children group.


In [None]:
mac2_cond = ['age<20','male']
mac2 = apply_cond(left_stria, left_stria_condic, mac2_cond)
print(len(mac2))

iq = 'FIQ'
# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
# md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('striatum') + study ", data=mac2) #  
mdf = md.fit()
print(mdf.summary())


varname = "Q('striatum')"
hyp_name = 'MAC-2'
resdic.update(md2dic(varname, hyp_name, cond_mac+mac2_cond, mdf=mdf))

### MAC-3: Left striatum volume (caudate + putamen) will not correlate with IQ in the female children group.


In [None]:
mac3_cond = ['age<20','female']
mac3 = apply_cond(left_stria, left_stria_condic, mac3_cond)
print(len(mac3))

iq = 'FIQ'
# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
# md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('striatum') + study ", data=mac3) #  
mdf = md.fit()
print(mdf.summary())


varname, hyp_name = "Q('striatum')",'MAC-3'
resdic.update(md2dic(varname, hyp_name, cond_mac+mac3_cond, mdf=mdf))

In [None]:
resdic

### compute cc and tbv  for GANJ, no age or gender condition 

In [None]:
condic = define_conditions(hie, tooldic=tooldic, normalDev=normalDev, adhd=adhd, h2ube=h2ube)
condic.keys()

In [None]:
softw = 'fs'
hypGanj = ['fiq>0','normDev', softw] # ,'age<20'
hyp_ccant = hypGanj + ['ccant'] #
hyp_cccen = hypGanj + ['cccen'] #
hyp_ccpos = hypGanj + ['ccpos'] #

ccant = apply_cond(hie, condic, hyp_ccant, dropnaset=dropnaset, columns={'volume':'ccant'})
cccen = apply_cond(hie, condic, hyp_cccen, dropnaset=dropnaset, columns={'volume':'cccen'})
ccpos = apply_cond(hie, condic, hyp_ccpos, dropnaset=dropnaset, columns={'volume':'ccpos'})

cc = pd.merge(left=ccant, right=cccen[['ID','cccen']], left_on='ID', right_on='ID')
cc = pd.merge(left=cc, right=ccpos[['ID','ccpos']], left_on='ID', right_on='ID')

cc['cc'] = cc['ccant']+cc['cccen']+cc['ccpos']
len(cc), len(ccant), len(cccen), len(ccpos)

In [None]:
hyp_gm = hypGanj + ['gm'] #
gm = apply_cond(hie, condic, hyp_gm, dropnaset=dropnaset, columns={'volume':'gm'})
#print(gm[['ID','gm','laterality']].head(2),len(gm))

hyp_wm = hypGanj + ['wm','latNan'] #
wm = apply_cond(hie, condic, hyp_wm, dropnaset=dropnaset, columns={'volume':'wm'})
#print(wm[['ID','wm','laterality']].head(2),len(wm))

hyp_csf = hypGanj + ['csf'] #
csf = apply_cond(hie, condic, hyp_csf, dropnaset=dropnaset, columns={'volume':'csf'})
#print(csf[['ID','csf','laterality']].head(2),len(csf))
tbv = pd.merge(left=gm, right=wm[['ID','wm']], left_on='ID', right_on='ID')
tbv = pd.merge(left=tbv, right=csf[['ID','csf']], left_on='ID', right_on='ID')
tbv['tbv'] = tbv['wm'] + tbv['gm'] +  tbv['csf']
print(len(gm), len(wm), len(csf), len(tbv))

In [None]:
cc_tbv = pd.merge(left=tbv, right=cc[['ID','cc']], left_on='ID', right_on='ID')
condic_cc_tbv = define_conditions(cc_tbv, tooldic=tooldic, normalDev=normalDev, adhd=adhd, h2ube=h2ube)


### GANJ-1: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will negatively correlate with IQ.


In [None]:
ganj1_cond = ['age<20'] #'fiq>0','normDev','fs','age<=20','ccant'] #
ganj1 = apply_cond(cc_tbv, condic_cc_tbv, ganj1_cond)
print(list(ganj1),len(ganj1))

iq = 'FIQ'
# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
#md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('cc') + study + tbv ", data=ganj1) #  
mdf = md.fit()
print(mdf.summary())

varname, hyp_name = "Q('cc')",'GANJ-1'
resdic.update(md2dic(varname, hyp_name, hypGanj+ganj1_cond, mdf=mdf))

### GANJ-2: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will negatively correlate with IQ in the young (age < 12) group.

In [None]:
ganj2_cond = ['age<12'] #'fiq>0','normDev','fs','age<=20','ccant'] #
ganj2 = apply_cond(cc_tbv, condic_cc_tbv, ganj2_cond)
print(list(ganj2),len(ganj2));

iq = 'FIQ'
# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
#md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('cc') + study + tbv ", data=ganj2) #  
mdf = md.fit()
print(mdf.summary())


varname, hyp_name = "Q('cc')",'GANJ-2'
resdic.update(md2dic(varname, hyp_name, hypGanj+ganj2_cond, mdf=mdf))

### GANJ-3: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will not significantly correlate with IQ in the adolescent (age > 12) group.


In [None]:
ganj3_cond = ['age>=12'] #
ganj3 = apply_cond(cc_tbv, condic_cc_tbv, ganj3_cond)
print(len(ganj3))

iq = 'FIQ'
# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
#md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('cc') + study + tbv ", data=ganj3) #  
mdf = md.fit()
print(mdf.summary())


varname, hyp_name = "Q('cc')",'GANJ-3'
resdic.update(md2dic(varname, hyp_name, hypGanj+ganj3_cond, mdf=mdf))

### GANJ-4:. Total Corpus Callosum midsagittal area, after correcting for total brain volume, will negatively correlate with IQ in the male (age < 12) group.


In [None]:
ganj4_cond = ['age<12','male'] #+ hypGanj #'fiq>0','normDev','fs','age<=20','ccant'] #
ganj4 = apply_cond(cc_tbv,  condic_cc_tbv, ganj4_cond)
print(list(ganj4),len(ganj4),'\n')

iq = 'FIQ'
# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
#md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('cc') + study + tbv ", data=ganj4) #  
mdf = md.fit()
print(mdf.summary())


varname, hyp_name = "Q('cc')",'GANJ-4'
resdic.update(md2dic(varname, hyp_name, hypGanj+ganj4_cond, mdf=mdf))

### GANJ-5: Total Corpus Callosum midsagittal area, after correcting for total brain volume, will not significantly correlate with IQ in the female (age < 12) group.


In [None]:
ganj5_cond = ['age<12','female'] #,'fiq>0','normDev','fs','age<=20'
ganj5 = apply_cond(cc_tbv,  condic_cc_tbv, ganj5_cond)
print(list(ganj5),len(ganj5),'\n')

iq = 'FIQ'
# md = smf.ols(iq + " ~ Q('volume') + Gender + Age + study ", data=tmp) #  
#md = smf.ols(iq + " ~ Q('striatum') + study + TIV ", data=stria) #  
md = smf.ols(iq + " ~ Q('cc') + study + tbv ", data=ganj5) #  
mdf = md.fit()
print(mdf.summary())


varname, hyp_name = "Q('cc')",'GANJ-5'
resdic.update(md2dic(varname, hyp_name, hypGanj+ganj5_cond, mdf=mdf))

In [None]:
print(resdic)

### Scrap

In [None]:
"""
hyp2 = ['fiq>0','normDev','fs','age<20']
hyp2_caud = hyp2 + ['caudate'] # ,'abide']
hyp2_put = hyp2 + ['putamen'] # ,'abide']
hyp2_tiv = hyp2 + ['TIV'] # ,'abide']
tmp_caud = apply_cond(hie, condic, hyp2_caud, dropnaset=dropnaset)
caud = split_merge_df(tmp_caud, indx='ID', spliton='laterality', levels=['Left','Right'], 
                       keep_col='volume', op='+',colrename='caudate')
tmp_put = apply_cond(hie, condic, hyp2_put, dropnaset=dropnaset)
put = split_merge_df(tmp_put, indx='ID', spliton='laterality', levels=['Left','Right'], 
                       keep_col='volume', op='+',colrename='putamen')
tmp_tiv = apply_cond(hie, condic, hyp2_tiv, dropnaset=dropnaset)
print(len(caud), len(put), len(tmp_tiv))

stria = pd.merge(left=caud, right=put[['ID','putamen']], left_on='ID', right_on='ID')
stria['striatum'] = stria['caudate']+stria['putamen']
stria = pd.merge(left=stria, right=tmp_tiv[['ID','volume']], left_on='ID', right_on='ID')
stria.rename(columns={'volume':'TIV'},inplace=True)
print(list(stria),len(stria))
""";