In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm

import pickle

Load Quality Control Data

In [2]:
via11_scQA = pd.read_excel('/mnt/projects/VIA_Vlad/nobackup/QA_centralSulcus_nht.xlsx')[['subjects', 'sites', 'vis_QA', 'editted']]
via11_scQA = via11_scQA[~via11_scQA.subjects.isna()]
via11_scQA = via11_scQA.set_index('subjects')

Load Demographic Data and GT and Match with QC Data

In [3]:
via11_demo = pd.read_excel('/mnt/projects/VIA_Vlad/nobackup/VIA11_fmriflanker_info_database_2021jan25.xlsx')
via11_demo = via11_demo[via11_demo.mp2rage_exists == 1].drop(columns=['in_via11', 'stormdb'])
via11_demo['subjects'] = [f'sub-via{x:03d}' for x in via11_demo.via_id]
via11_demo = via11_demo[~via11_demo.subjects.isna()]
via11_demo = via11_demo[via11_demo.subjects.isin(via11_scQA.index.values)]
via11_demo = via11_demo.set_index('subjects')

for subject in via11_demo.index.values:
    # via11_demo.loc[subject, 'site'] = via11_scQA.loc[subject, 'sites']
    via11_demo.loc[subject, 'vis_QA'] = via11_scQA.loc[subject, 'vis_QA']
    via11_demo.loc[subject, 'editted'] = via11_scQA.loc[subject, 'editted']

In [5]:
via11_demo

Unnamed: 0_level_0,via_id,site,group,mri_age,sex,handedness,tanner,cbcl_total,cbcl_external,cbcl_internal,...,flanker_behav_deltaRT_3sd,no_sMRI,no_flanker_fMRI,Braces,neurological disorder,in_flanker_behav,in_flanker_analysis,comment,vis_QA,editted
subjects,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
sub-via003,3,1.0,3.0,12.188912,0.0,3.0,2.0,4.0,1.0,1.0,...,0.0,0.0,0.0,0,0,1.0,1.0,,2.0,1.0
sub-via004,4,1.0,3.0,11.764545,0.0,3.0,2.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,0,0.0,0.0,wrong e-prime,1.0,1.0
sub-via005,5,1.0,3.0,11.493498,0.0,3.0,2.0,9.0,2.0,3.0,...,0.0,0.0,0.0,0,0,0.0,0.0,wrong e-prime,1.0,1.0
sub-via010,10,2.0,2.0,11.718001,1.0,2.0,2.0,62.0,14.0,13.0,...,0.0,0.0,0.0,0,0,0.0,0.0,wrong e-prime,2.0,1.0
sub-via013,13,2.0,3.0,12.287474,0.0,2.0,3.0,7.0,2.0,2.0,...,0.0,0.0,0.0,0,0,1.0,1.0,,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sub-via517,517,2.0,3.0,11.641342,1.0,3.0,3.0,4.0,0.0,3.0,...,0.0,1.0,1.0,0,0,0.0,0.0,stim. Com. Malfunction,1.0,0.0
sub-via518,518,1.0,3.0,12.353183,1.0,3.0,2.0,3.0,0.0,1.0,...,0.0,0.0,0.0,0,0,1.0,1.0,,2.0,1.0
sub-via519,519,2.0,3.0,12.533881,1.0,3.0,4.0,11.0,2.0,3.0,...,0.0,0.0,0.0,0,0,1.0,1.0,,2.0,1.0
sub-via521,521,1.0,2.0,11.118412,1.0,2.0,2.0,8.0,4.0,3.0,...,0.0,0.0,0.0,0,0,1.0,1.0,,2.0,1.0


Load CS features

In [6]:
cs_feat = pd.read_pickle('../data/bvisa_subject_features.pkl')

In [13]:
cs_feat['sub-via003'].keys()

dict_keys(['morphology', 'depth_profile_L', 'depth_profile_R', 'medial_axis_L', 'medial_axis_R'])

In [84]:
def select_morphofeatures(df:pd.DataFrame,
                          features:list[str] = ['label',
                                                'side',
                                                'surface_talairach',
                                                'maxdepth_talairach',
                                                'meandepth_talairach',
                                                'hull_junction_length_talairach',
                                                'GM_thickness', 'opening'], 
                          sulci: list[str] = ['S.C.'],
                          side: str = 'r'):
    """_summary_

    Args:
        df (pd.DataFrame): DataFrame with morphological features of a subject
        features (list[str], optional): List of features to use. Defaults to ['label', 'side', 'surface_talairach', 'maxdepth_talairach', 'meandepth_talairach'].
        sulci (list[str], optional): List of sulci to use. Defaults to ['S.C.'].
        side (str, optional): Which sides of sulci to use as features.
            Could be 'l' or 'r' to extract features only from left or right
            sulci or 'both' to extract features from both. Defaults to 'r'.

    Returns:
        np.ndarray: 1D array of features
        np.ndarray: 1D array of feature names
    """
    features = df[df.label.isin(sulci)][features]
    if len(sulci) == 1:
        features = features.drop(columns=['label'])
    else:
        raise NotImplementedError('Only one sulcus is supported for now')
    
    # get feature names
    if side == 'l' or side =='r':
        feature_names = [f'{side}_{c}' for c in features.columns.values]
    else:
        feature_names = [f'{s}_{c}' for s in ['l', 'r'] for c in features.columns.values]
    
    # transform features into an array
    if side == 'l':
        features = features[features.side == 'left'].drop(columns=['side'])
    elif side == 'r':
        features = features[features.side == 'right'].drop(columns=['side'])
    elif side == 'both':
        features = features.drop(columns=['side'])
    else:
        raise ValueError('side should be either "l", "r" or "both"')
    features = features.values.flatten()
    
    
    return features, feature_names
    

In [85]:
select_morphofeatures(cs_feat['sub-via003']['morphology'], side='l')

(array([4.35389e+03, 2.72200e+01, 1.74400e+01, 1.31680e+02, 3.54000e+00,
        1.07000e+00]),
 ['l_side',
  'l_surface_talairach',
  'l_maxdepth_talairach',
  'l_meandepth_talairach',
  'l_hull_junction_length_talairach',
  'l_GM_thickness',
  'l_opening'])

In [52]:
cs_feat['sub-via003']['morphology'][cs_feat['sub-via003']['morphology']['label'] =='S.C.']#[['label', 'side', 'surface_talairach', 'maxdepth_talairach', 'meandepth_talairach', 'hull_junction_length_talairach', 'GM_thickness', 'opening']]

Unnamed: 0,sulcus,label,side,surface_talairach,surface_native,maxdepth_talairach,maxdepth_native,meandepth_talairach,meandepth_native,hull_junction_length_talairach,hull_junction_length_native,GM_thickness,opening
40,S.C._left,S.C.,left,4353.89,4039.42,27.22,26.32,17.44,16.82,131.68,127.16,3.54,1.07
41,S.C._right,S.C.,right,4470.7,4151.93,25.92,25.06,15.72,15.19,153.66,148.14,3.55,1.27


In [21]:
cs_feat['sub-via003']['morphology']['label'].values

array(['F.C.L.a.', 'F.C.L.a.', 'F.C.L.p.', 'F.C.L.p.', 'F.C.L.r.ant.',
       'F.C.L.r.ant.', 'F.C.L.r.asc.', 'F.C.L.r.asc.', 'F.C.L.r.diag.',
       'F.C.L.r.diag.', 'F.C.L.r.retroC.tr.', 'F.C.L.r.retroC.tr.',
       'F.C.L.r.sc.ant.', 'F.C.L.r.sc.ant.', 'F.C.L.r.sc.post.',
       'F.C.L.r.sc.post.', 'F.C.M.ant.', 'F.C.M.ant.', 'F.C.M.post.',
       'F.C.M.post.', 'F.Cal.ant.-Sc.Cal.', 'F.Cal.ant.-Sc.Cal.',
       'F.Coll.', 'F.Coll.', 'F.I.P.Po.C.inf.', 'F.I.P.Po.C.inf.',
       'F.I.P.', 'F.I.P.', 'F.I.P.r.int.1', 'F.I.P.r.int.1',
       'F.I.P.r.int.2', 'F.I.P.r.int.2', 'F.P.O.', 'F.P.O.', 'INSULA',
       'INSULA', 'OCCIPITAL', 'OCCIPITAL', 'S.C.LPC.', 'S.C.LPC.', 'S.C.',
       'S.C.', 'S.C.sylvian.', 'S.C.sylvian.', 'S.Call.', 'S.Call.',
       'S.Cu.', 'S.Cu.', 'S.F.inf.', 'S.F.inf.', 'S.F.inf.ant.',
       'S.F.inf.ant.', 'S.F.int.', 'S.F.int.', 'S.F.inter.', 'S.F.inter.',
       'S.F.marginal.', 'S.F.marginal.', 'S.F.median.', 'S.F.median.',
       'S.F.orbitaire.', 'S.F.orbi