In this notebook, I will prepare FSDG files and contrast files to run vertex-level analyses for MRiShare data.

Measures of interest: CT and CSA

The model is a follow-up to the Simple Model that did not model the group diffrences.

Here, Sex is included in the group that include both sexes, and the effects of Sex and Age are examined in a model that control for ICV. Two strategies for centering Age and ICV are compared:

1. Age/ICV centered across groups (AgeC, eTIVC)

2. Age/ICV centered for each group separately (AgeGC, eTIVGC)


## 1. Imports, data and output dirs


In [1]:
import os
import os.path as op
import numpy as np
import pandas as pd

In [2]:
outdir = 'SBM_main'
os.makedirs(outdir, exist_ok=True)

In [9]:
simple_out = op.join(outdir, 'Simple_Models')
sub_info = pd.read_csv(op.join(outdir, 'subjects_info.csv'), index_col=0)

In [10]:
sub_info.head()

Unnamed: 0_level_0,Sex,Age,eTIV
mrishare_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SHARE0001,M,24.241,1847961.0
SHARE0002,F,22.943,1446134.0
SHARE0003,F,23.633,1427213.0
SHARE0004,M,23.107,1825874.0
SHARE0005,F,25.021,1289739.0


In [11]:
mg_out = op.join(outdir, 'Model_Sex')
os.makedirs(mg_out, exist_ok=True)

In [12]:
group_out = op.join(mg_out, 'Both')
os.makedirs(group_out, exist_ok=True)

## 2. Create FSDG and contrast files for models with Sex

In [13]:
group_info = pd.read_csv(op.join(simple_out, 'Both', 'group_info.csv'), index_col=0)
group_info.head()

Unnamed: 0_level_0,Sex,Age,eTIV,AgeC,SqAgeC,eTIVC
mrishare_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SHARE0001,M,24.241,1847961.0,2.130683,4.539809,279630.987766
SHARE0002,F,22.943,1446134.0,0.832683,0.69336,-122195.115844
SHARE0003,F,23.633,1427213.0,1.522683,2.318562,-141116.138384
SHARE0004,M,23.107,1825874.0,0.996683,0.993376,257544.682786
SHARE0005,F,25.021,1289739.0,2.910683,8.472074,-278590.321124


In [14]:
len(group_info)

1834

In [15]:
# Add group centered Age and eTIV columns

male_df = pd.read_csv(op.join(simple_out, 'Male', 'group_info.csv'), index_col=0)
female_df = pd.read_csv(op.join(simple_out, 'Female', 'group_info.csv'), index_col=0)



In [17]:
concat_mf_info = pd.concat((male_df, female_df))
concat_mf_info.head()

Unnamed: 0_level_0,Sex,Age,eTIV,AgeC,SqAgeC,eTIVC
mrishare_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SHARE0001,M,24.241,1847961.0,1.887374,3.562179,143763.640343
SHARE0004,M,23.107,1825874.0,0.753374,0.567572,121677.335363
SHARE0006,M,22.916,1756562.0,0.562374,0.316264,52365.537423
SHARE0007,M,20.011,1523241.0,-2.342626,5.487899,-180955.476727
SHARE0008,M,23.715,1648408.0,1.361374,1.853338,-55789.129727


In [19]:
group_centered_df = concat_mf_info[['AgeC', 'eTIVC']]
group_centered_df.columns = ['AgeGC', 'eTIVGC']

In [20]:
new_group_info = pd.concat([group_info, group_centered_df], axis=1, sort=False)
new_group_info.head()

Unnamed: 0,Sex,Age,eTIV,AgeC,SqAgeC,eTIVC,AgeGC,eTIVGC
SHARE0001,M,24.241,1847961.0,2.130683,4.539809,279630.987766,1.887374,143763.640343
SHARE0002,F,22.943,1446134.0,0.832683,0.69336,-122195.115844,0.927426,-69289.194196
SHARE0003,F,23.633,1427213.0,1.522683,2.318562,-141116.138384,1.617426,-88210.216736
SHARE0004,M,23.107,1825874.0,0.996683,0.993376,257544.682786,0.753374,121677.335363
SHARE0005,F,25.021,1289739.0,2.910683,8.472074,-278590.321124,3.005426,-225684.399476


In [34]:
# save new group info
new_group_info.index.names = ['mrishare_id']
new_group_info.to_csv(op.join(group_out, 'group_info.csv'))

In [42]:
models = {'Model6a_Sex_Age_ICV': ['AgeC', 'eTIVC'],
          'Model6b_Sex_Age_ICV': ['AgeGC', 'eTIVGC']}

In [23]:
gm_fsgd_lines = ['GroupDescriptorFile 1', 'Class F', 'Class M']

In [44]:
# Iterate over  each model to create fsdg file

for model_name, variables in models.items():
    model_out = op.join(group_out, model_name)
    os.makedirs(model_out, exist_ok=True)
        
    var_list = ['Variables'] + variables
    extra_line = ' '.join(var_list)
    lines = gm_fsgd_lines + [extra_line]
        
    cols = ['mrishare_id', 'Sex'] + variables
    var_df = new_group_info.reset_index()[cols]
    var_df['Input'] = 'Input'
    col_order = ['Input', 'mrishare_id', 'Sex'] + variables
        
    fsgd_file = op.join(model_out, '{}.fsgd'.format(model_name))
    with open(fsgd_file, 'w') as f:
        for line in lines:
            f.write(line + '\n')
        var_df[col_order].to_csv(f, header=False, index=False, sep=' ')

In [35]:
contrasts = {
             'group.diff': [[1, -1, 0, 0, 0, 0]], # Is there a sex diff bet group intercept after correcting for age and eTIV?
             'group-x-age': [[0, 0, 1, -1, 0, 0]], # Is there a difference bet group in the effect of age?
             'group-x-ICV': [[0, 0, 0, 0, 1, -1]], # Is there a difference bet group in the effect of ICV?
             'FM-age': [[0, 0, 0.5, 0.5, 0, 0]], # Is there any age effect after correcting for sex and ICV?
             'FM-ICV': [[0, 0, 0, 0, 0.5, 0.5]], # Is there any ICV effect after correcting for sex and age?
             'group-x-age-x-ICV': [[0, 0, 1, -1, 0, 0],
                                   [0, 0, 0, 0, 1, -1]] # Is there an interaction bet group, age and ICV?
            }

In [36]:
# For each model, create contrast.mtx file and direction of expected effects

for model_name, variables in models.items():
    model_out = op.join(group_out, model_name)
        
    for contrast_name, contrast_list in contrasts.items():
        contrast_file = op.join(model_out, '{}.mtx'.format(contrast_name))
        contrast_sign_file = op.join(model_out, '{}.mdtx'.format(contrast_name))
        
        with open(contrast_file, 'w') as f:
            lines = [' '.join(str(val) for val in contrast) for contrast in contrast_list]
            f.write('\n'.join(lines))
            
        with open(contrast_sign_file, 'w') as f:
            f.write('abs') 

For some reason, GLM for Model6a failed, possibly due to rank deficiency (however, AP ran very simlar models w/o any issues...). 

After discussing with BM, we decided to have more simple model just contrasting F vs M, with or without Age effects. Also, for model including Age and ICV, group-center ICV but not for Age.

In [43]:
models['Model6c_Sex_Age_ICV'] = ['AgeC', 'eTIVGC']
models['Model7_Sex'] = []
models['Model8_Sex_Age'] = ['AgeC']

In [45]:
# fsgd file created using cell above. For contrasts, we need different set of contrasts for each model

contrast_dict = {'Model6': contrasts,
                 'Model7': {'group.diff': [[1, -1]]},
                 'Model8': {'group.diff': [[1, -1, 0, 0]],
                            'group-x-age': [[0, 0, 1, -1]],
                            'FM-age': [[0, 0, 0.5, 0.5]]}}

In [46]:
for model_name, variables in models.items():
    model_out = op.join(group_out, model_name)
    
    contrasts = contrast_dict[model_name[:6]]
        
    for contrast_name, contrast_list in contrasts.items():
        contrast_file = op.join(model_out, '{}.mtx'.format(contrast_name))
        contrast_sign_file = op.join(model_out, '{}.mdtx'.format(contrast_name))
        
        with open(contrast_file, 'w') as f:
            lines = [' '.join(str(val) for val in contrast) for contrast in contrast_list]
            f.write('\n'.join(lines))
            
        with open(contrast_sign_file, 'w') as f:
            f.write('abs') 

These models were run with run_Group_model_SBM.sh in /data/extra/tsuchida/MRiShare/SBM.

## 3. Create summary figures

The analyses for the simple models were performed using a WF in Freesurfer_SBM.py.

To make it easy to compare the results across the 3 groups, plot all the equivalent images across them.

Since plotting takes up lots of memory, use plotSBMresults.py script in /data/extra/tsuchida/MRiShare/SBM/ to submit plotting jobs to SLURM. Here, json files for each plot will be created.

In [11]:
import json

In [6]:
png_out = op.join(simple_out, 'Results_figures')
os.makedirs(png_out, exist_ok=True)

In [7]:
surf_measures = ['thickness', 'area']

In [8]:
fwhm = 10.0

In [9]:
sink_dir = '/data/extra/tsuchida/MRiShare/SBM/'

In [None]:
fs6_dir = '/data/analyses/work_in_progress/freesurfer/fsmrishare-flair6.0/'
template_surf_dir = op.join(fs6_dir, 'fsaverage', 'surf')

In [None]:
lh_infl = op.join(template_surf_dir, 'lh.inflated')
rh_infl = op.join(template_surf_dir, 'rh.inflated')
lh_pial = op.join(template_surf_dir, 'lh.pial')
rh_pial = op.join(template_surf_dir, 'rh.pial')
lh_sulc = op.join(template_surf_dir, 'lh.sulc')
rh_sulc = op.join(template_surf_dir, 'rh.sulc')

In [None]:
overlay_surfs = {'inflated': (lh_infl, rh_infl),
                 'pial': (lh_pial, rh_pial)}

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from plotting_tools.img_plotting import plot_multi_surf_map, plot_multi_surf_stat

### 3.1 Mean maps

In [10]:
mean_out = op.join(png_out, 'Mean_maps')
for meas in surf_measures:
    meas_out = op.join(mean_out, meas)
    os.makedirs(meas_out, exist_ok=True)

In [13]:
for meas in surf_measures:
    # Get mean data for meas in each group
    lh_surf_maps = [op.join(sink_dir, 'SBM_{}Sink'.format(g), '_measure_{}'.format(meas), 'mean.lh.fwhm10.fsaverage.mgh')
                   for g in simple_groups]
    rh_surf_maps = [op.join(sink_dir, 'SBM_{}Sink'.format(g), '_measure_{}'.format(meas), 'mean.rh.fwhm10.fsaverage.mgh')
                   for g in simple_groups]
    
    out_basename = 'Mean_{}'.format(meas)
    fig_title = 'Mean {}'.format(meas)
    
    # put in json
    dat = {'fig_title': fig_title,
           'out_basename': out_basename,
           'plot_type': 'surf_map',
           'group_names': simple_groups,
           'lh_surf_maps': lh_surf_maps,
           'rh_surf_maps': rh_surf_maps}
    json_fname = op.join(png_out, '{}_arg_dict.json'.format(out_basename))
    with open(json_fname, 'w') as f:
        json.dump(dat, f)

In [None]:
for meas in surf_measures:
    out_dir = op.join(mean_out, meas)
    
    # Get mean data for meas in each group
    lh_surf_maps = [op.join(sink_dir, 'SBM_{}Sink'.format(g), '_measure_{}'.format(meas), 'mean.lh.fwhm10.fsaverage.mgh')
                   for g in simple_groups]
    rh_surf_maps = [op.join(sink_dir, 'SBM_{}Sink'.format(g), '_measure_{}'.format(meas), 'mean.rh.fwhm10.fsaverage.mgh')
                   for g in simple_groups]
    
    
    # plot for each overlay surf
    for surf_name, overlay_surf_pair in overlay_surfs.items():
        out_fname = op.join(out_dir, 'Mean_{}_on_{}_surf.png'.format(meas, surf_name))
        fig_title = 'Mean {}'.format(meas)
        plot_multi_surf_map(lh_surf=overlay_surf_pair[0], rh_surf=overlay_surf_pair[1],
                            lh_bg_map=lh_sulc, rh_bg_map=rh_sulc,
                            lh_surf_maps=lh_surf_maps, rh_surf_maps=rh_surf_maps,
                            group_names=simple_groups, out_fname=out_fname, fig_title=fig_title)

### 3.2. Statistical maps

In [15]:
p_out = op.join(png_out, 'Sig_maps')
os.makedirs(p_out, exist_ok=True)

In [41]:
for meas in surf_measures:
    for model_name, contrasts in simple_models.items():
        for cont in contrasts:
            fig_title = '{}: Effect of {} on {}'.format(model_name, cont, meas)
            # Get mean data for meas in each group
            lh_stat_maps = [op.join(sink_dir,
                                    'SBM_{}Sink'.format(g),
                                    '_model_name_{}'.format(model_name),
                                    '_measure_{}'.format(meas),
                                    '_fwhm_10.0',
                                    'lhSBMglmfit',
                                    cont,
                                    'sig.mgh') for g in simple_groups]
            rh_stat_maps = [op.join(sink_dir,
                                    'SBM_{}Sink'.format(g),
                                    '_model_name_{}'.format(model_name),
                                    '_measure_{}'.format(meas),
                                    '_fwhm_10.0',
                                    'rhSBMglmfit',
                                    cont,
                                    'sig.mgh') for g in simple_groups]
    
            for corr in ['corrected', 'uncorrected']:
                # Names
                out_basename = '{}_cont_{}_{}_{}P'.format(model_name, cont, meas, corr)
                if corr == 'corrected':
                    thresholds = [op.join(sink_dir, 'SBM_{}Sink'.format(g),
                                          '_model_name_{}'.format(model_name),
                                          '_measure_{}'.format(meas),
                                          '_fwhm_10.0',
                                          cont,
                                          'fdr_threshold.txt') for g in simple_groups]
                else:
                    thresholds = None

                # put in json
                dat = {'fig_title': fig_title,
                       'out_basename': out_basename,
                       'plot_type': 'surf_stat',
                       'group_names': simple_groups,
                       'lh_stat_maps': lh_stat_maps,
                       'rh_stat_maps': rh_stat_maps,
                       'upper_lim': 10.0,
                       'thresholds': thresholds,
                       'cmap': 'coolwarm'}
                json_fname = op.join(p_out, '{}_arg_dict.json'.format(out_basename))
                with open(json_fname, 'w') as f:
                    json.dump(dat, f)

In [37]:
g_out = op.join(png_out, 'Gamma_maps')
os.makedirs(g_out, exist_ok=True)

In [33]:
g_corr = ['masked', 'uncorrected']

In [42]:
for meas in surf_measures:
    for model_name, contrasts in simple_models.items():
        for cont in contrasts:
            fig_title = '{}: Effect of {} on {}'.format(model_name, cont, meas)
            # Get mean data for meas in each group
            stat_base = [op.join(sink_dir,
                                'SBM_{}Sink'.format(g),
                                '_model_name_{}'.format(model_name),
                                '_measure_{}'.format(meas),
                                '_fwhm_10.0') for g in simple_groups]
    
            for corr in g_corr:
                # Names
                out_basename = '{}_cont_{}_{}_{}G'.format(model_name, cont, meas, corr)
                if corr == 'masked':
                    lh_stat_maps = [op.join(d, 
                                            'lhSBMglmfit',
                                            cont,
                                            'gamma.mgh') for d in stat_base]
                    rh_stat_maps = [op.join(d, 
                                            'rhSBMglmfit',
                                            cont,
                                            'gamma.mgh') for d in stat_base]
                    
                else:
                    lh_stat_maps = [op.join(d, cont, 'lh.masked_gamma.mgh') for d in stat_base]
                    rh_stat_maps = [op.join(d, cont, 'rh.masked_gamma.mgh') for d in stat_base]

                # put in json
                dat = {'fig_title': fig_title,
                       'out_basename': out_basename,
                       'plot_type': 'surf_stat',
                       'group_names': simple_groups,
                       'lh_stat_maps': lh_stat_maps,
                       'rh_stat_maps': rh_stat_maps,
                       'upper_lim': None,
                       'thresholds': None,
                       'cmap': 'jet'}
                json_fname = op.join(g_out, '{}_arg_dict.json'.format(out_basename))
                with open(json_fname, 'w') as f:
                    json.dump(dat, f)