In [1]:
# set up libraries
    # We will need the RBCPath type from the rbclib package to load data from the RBC.
from rbclib import RBCPath
    # We'll also want to load some data directly from the filesystem.
from pathlib import Path
    # We'll want to load/process some of the data using pandas and numpy.
import pandas as pd
import numpy as np

In [2]:
# functions form the given analysis function 
def load_fsdata(participant_id, local_cache_dir=(Path.home() / 'cache')):
    "Loads and returns the dataframe of a PNC participant's FreeSurfer data."
    # from Neurohakademy 2025 organizers

    # Check that the local_cache_dir exists and make it if it doesn't.
    if local_cache_dir is not None:
        local_cache_dir = Path(local_cache_dir)
        local_cache_dir.mkdir(exist_ok=True)
    
    # Make the RBCPath and find the appropriate file:
    pnc_freesurfer_path = RBCPath(
        'rbc://PNC_FreeSurfer/freesurfer',
        # We provide the local_cache_dir to the RBCPath object; all paths made
        # from this object will use the same cache directory.
        local_cache_dir=local_cache_dir)
    participant_path = pnc_freesurfer_path / f'sub-{participant_id}'
    tsv_path = participant_path / f'sub-{participant_id}_regionsurfacestats.tsv'

    # Use pandas to read in the TSV file:
    with tsv_path.open('r') as f:
        data = pd.read_csv(f, sep='\t')

    # Return the loaded data:
    return data

In [65]:
# My functions 
def flatten_RBC_participant(participant_id):
    "Flatten the data from RBC for a given participant."
    
    # Load Data
    df = load_fsdata(participant_id)
    
    # Summarize Feature Info
    df['feature_info'] = df[['atlas', 'hemisphere', 'StructName']].agg('__'.join, axis=1)
    df = df.drop(columns=['atlas', 'hemisphere', 'StructName'])
    
    # Make a place to store the data
    output_df = pd.DataFrame(columns = ['measure_info', 'value'])
    
    # Iterate through the measures flattening them and adding them to the new df
    allowed_measures = ['NumVert', 'SurfArea', 'GrayVol', 'ThickAvg', 'ThickStd', 'MeanCurv', 'GausCurv', 'FoldInd', 'CurvInd', 
                        'Index', 'SegId', 'Mean_wgpct', 'StdDev_wgpct', 'Min_wgpct', 'Max_wgpct', 'Range_wgpct', 'SNR_wgpct', 
                        'Mean_piallgi', 'StdDev_piallgi', 'Min_piallgi', 'Max_piallgi', 'Range_piallgi']
    for measure in allowed_measures:
        temp_df = pd.DataFrame(columns = ['measure_info', 'value'])
        labels = df['feature_info'].apply(lambda x : f'{x}__{measure}')
        values = df[measure]
        temp_df['measure_info'] = labels
        temp_df['value'] = values
        output_df = pd.concat([output_df, temp_df])
    
    return output_df


def make_RBC_data_into_a_table(participant_id_list):
    '''Make a dataframe of all RBC participants in the given list'''

    # Initialize the df with the first participant
    output_flattened_df = flatten_RBC_participant(participant_id_list[0])
    output_flattened_df = output_flattened_df.rename(columns={'value':participant_id_list[0]}).set_index('measure_info').T

    # add in all the other participants
    for participant_id in participant_id_list[1:]:
        temp_df = flatten_RBC_participant(participant_id)
        temp_df = temp_df.rename(columns={'value':participant_id}).set_index('measure_info').T
        output_flattened_df = pd.concat([output_flattened_df, temp_df])
    
    return output_flattened_df
        
    

In [52]:
example_participant_id = 1000393599

# Load Data
example_df = load_fsdata(example_participant_id)

# Remove session id because it is empty
example_df = example_df.drop(columns=['session_id'])

# Summarize Feature Info
example_df['feature_info'] = example_df[['atlas', 'hemisphere', 'StructName']].agg('__'.join, axis=1)
example_df = example_df.drop(columns=['atlas', 'hemisphere', 'StructName'])

# Make a place to store the data
output_df = pd.DataFrame(columns = ['measure_info', 'value'])

# Iterate through the measures flattening them and adding them to the new df
allowed_measures = ['NumVert', 'SurfArea', 'GrayVol', 'ThickAvg', 'ThickStd', 'MeanCurv', 'GausCurv', 'FoldInd', 'CurvInd', 
                    'Index', 'SegId', 'Mean_wgpct', 'StdDev_wgpct', 'Min_wgpct', 'Max_wgpct', 'Range_wgpct', 'SNR_wgpct', 
                    'Mean_piallgi', 'StdDev_piallgi', 'Min_piallgi', 'Max_piallgi', 'Range_piallgi']
for measure in allowed_measures:
    temp_df = pd.DataFrame(columns = ['measure_info', 'value'])
    labels = example_df['feature_info'].apply(lambda x : f'{x}__{measure}')
    values = example_df[measure]
    temp_df['measure_info'] = labels
    temp_df['value'] = values
    output_df = pd.concat([output_df, temp_df])
    
output_df

Unnamed: 0,measure_info,value
0,aparc.DKTatlas__lh__caudalanteriorcingulate__N...,1668
1,aparc.DKTatlas__lh__caudalmiddlefrontal__NumVert,3308
2,aparc.DKTatlas__lh__cuneus__NumVert,4102
3,aparc.DKTatlas__lh__entorhinal__NumVert,737
4,aparc.DKTatlas__lh__fusiform__NumVert,4115
...,...,...
13735,Yeo2011_7Networks_N1000__rh__7Networks_3__Rang...,2.25
13736,Yeo2011_7Networks_N1000__rh__7Networks_4__Rang...,3.2703
13737,Yeo2011_7Networks_N1000__rh__7Networks_5__Rang...,2.7538
13738,Yeo2011_7Networks_N1000__rh__7Networks_6__Rang...,3.055


In [60]:
participant_id = 1000393599
flatten_RBC_participant(participant_id).rename(columns={'value':participant_id}).set_index('measure_info').T

measure_info,aparc.DKTatlas__lh__caudalanteriorcingulate__NumVert,aparc.DKTatlas__lh__caudalmiddlefrontal__NumVert,aparc.DKTatlas__lh__cuneus__NumVert,aparc.DKTatlas__lh__entorhinal__NumVert,aparc.DKTatlas__lh__fusiform__NumVert,aparc.DKTatlas__lh__inferiorparietal__NumVert,aparc.DKTatlas__lh__inferiortemporal__NumVert,aparc.DKTatlas__lh__isthmuscingulate__NumVert,aparc.DKTatlas__lh__lateraloccipital__NumVert,aparc.DKTatlas__lh__lateralorbitofrontal__NumVert,...,Yeo2011_7Networks_N1000__lh__7Networks_6__Range_piallgi,Yeo2011_7Networks_N1000__lh__7Networks_7__Range_piallgi,Yeo2011_7Networks_N1000__rh__FreeSurfer_Defined_Medial_Wall__Range_piallgi,Yeo2011_7Networks_N1000__rh__7Networks_1__Range_piallgi,Yeo2011_7Networks_N1000__rh__7Networks_2__Range_piallgi,Yeo2011_7Networks_N1000__rh__7Networks_3__Range_piallgi,Yeo2011_7Networks_N1000__rh__7Networks_4__Range_piallgi,Yeo2011_7Networks_N1000__rh__7Networks_5__Range_piallgi,Yeo2011_7Networks_N1000__rh__7Networks_6__Range_piallgi,Yeo2011_7Networks_N1000__rh__7Networks_7__Range_piallgi
1000393599,1668,3308,4102,737,4115,7381,4828,1578,10035,4309,...,3.3341,3.4807,2.4321,1.2908,3.3515,2.25,3.2703,2.7538,3.055,2.759


In [66]:
make_RBC_data_into_a_table([1000393599,1317462,11407866])

measure_info,aparc.DKTatlas__lh__caudalanteriorcingulate__NumVert,aparc.DKTatlas__lh__caudalmiddlefrontal__NumVert,aparc.DKTatlas__lh__cuneus__NumVert,aparc.DKTatlas__lh__entorhinal__NumVert,aparc.DKTatlas__lh__fusiform__NumVert,aparc.DKTatlas__lh__inferiorparietal__NumVert,aparc.DKTatlas__lh__inferiortemporal__NumVert,aparc.DKTatlas__lh__isthmuscingulate__NumVert,aparc.DKTatlas__lh__lateraloccipital__NumVert,aparc.DKTatlas__lh__lateralorbitofrontal__NumVert,...,Slab__rh__region00878__StdDev_piallgi,CC200__lh__region00047__Min_piallgi,CC400__rh__region00012__Min_piallgi,Slab__rh__region00878__Min_piallgi,CC200__lh__region00047__Max_piallgi,CC400__rh__region00012__Max_piallgi,Slab__rh__region00878__Max_piallgi,CC200__lh__region00047__Range_piallgi,CC400__rh__region00012__Range_piallgi,Slab__rh__region00878__Range_piallgi
1000393599,1668,3308,4102,737,4115,7381,4828,1578,10035,4309,...,,,,,,,,,,
1317462,1931,4304,3388,619,4521,6401,5000,1747,8199,4887,...,,,,,,,,,,
11407866,2014,4213,3230,787,4320,7388,6058,1574,7431,4829,...,0.0,2.4648,2.4339,3.1556,2.4648,2.4339,3.1556,0.0,0.0,0.0


In [41]:
example_df.columns

Index(['subject_id', 'NumVert', 'SurfArea', 'GrayVol', 'ThickAvg', 'ThickStd',
       'MeanCurv', 'GausCurv', 'FoldInd', 'CurvInd', 'Index', 'SegId',
       'Mean_wgpct', 'StdDev_wgpct', 'Min_wgpct', 'Max_wgpct', 'Range_wgpct',
       'SNR_wgpct', 'Mean_piallgi', 'StdDev_piallgi', 'Min_piallgi',
       'Max_piallgi', 'Range_piallgi', 'feature_info'],
      dtype='object')

In [6]:
example_df.isna().sum(axis=0)

subject_id            0
session_id        13740
atlas                 0
hemisphere            0
StructName            0
NumVert               0
SurfArea              0
GrayVol               0
ThickAvg              0
ThickStd              0
MeanCurv              0
GausCurv              0
FoldInd               0
CurvInd               0
Index                 0
SegId                 0
Mean_wgpct            0
StdDev_wgpct          0
Min_wgpct             0
Max_wgpct             0
Range_wgpct           0
SNR_wgpct             0
Mean_piallgi          0
StdDev_piallgi        0
Min_piallgi           0
Max_piallgi           0
Range_piallgi         0
dtype: int64

In [9]:
# example_df['StructName'].tolist()
example_df['hemisphere'].value_counts()

hemisphere
rh    6876
lh    6864
Name: count, dtype: int64

In [10]:
example_df['StructName'].value_counts().value_counts()

count
1     1535
2      619
3      322
4      259
5      208
9      203
8      198
7      198
10     196
6      188
40       1
Name: count, dtype: int64

In [11]:
count_data = example_df['StructName'].value_counts()
count_data[count_data == 40]

StructName
Background+FreeSurfer_Defined_Medial_Wall    40
Name: count, dtype: int64

In [14]:
count_data = example_df['StructName'].value_counts()
count_data[count_data == 4]

StructName
7Networks_LH_Default_Temp_13    4
inferiortemporal                4
inferiorparietal                4
7Networks_RH_SomMot_63          4
17Networks_LH_ContA_PFClv_3     4
                               ..
cuneus                          4
caudalmiddlefrontal             4
caudalanteriorcingulate         4
medialorbitofrontal             4
middletemporal                  4
Name: count, Length: 259, dtype: int64

In [17]:
count_data = example_df[['StructName','atlas']].value_counts()
count_data.value_counts()

count
1    13034
2      353
Name: count, dtype: int64

In [18]:
count_data[count_data == 2]

StructName             atlas                   
S_central              aparc.a2009s                2
S_cingul-Marginalis    aparc.a2009s                2
S_circular_insula_ant  aparc.a2009s                2
S_circular_insula_inf  aparc.a2009s                2
S_circular_insula_sup  aparc.a2009s                2
                                                  ..
Lat_Fis-ant-Vertical   aparc.a2009s                2
region00109            CC400                       2
17Networks_3           Yeo2011_17Networks_N1000    2
17Networks_2           Yeo2011_17Networks_N1000    2
17Networks_16          Yeo2011_17Networks_N1000    2
Name: count, Length: 353, dtype: int64

In [21]:
example_df['atlas'].value_counts()

atlas
Schaefer2018_1000Parcels_7Networks_order     1002
Schaefer2018_1000Parcels_17Networks_order    1002
Schaefer2018_900Parcels_17Networks_order      902
Schaefer2018_900Parcels_7Networks_order       902
Schaefer2018_800Parcels_7Networks_order       802
Schaefer2018_800Parcels_17Networks_order      802
Schaefer2018_700Parcels_7Networks_order       702
Schaefer2018_700Parcels_17Networks_order      702
Slab                                          693
Schaefer2018_600Parcels_7Networks_order       602
Schaefer2018_600Parcels_17Networks_order      602
Schaefer2018_500Parcels_7Networks_order       502
Schaefer2018_500Parcels_17Networks_order      502
Schaefer2018_400Parcels_7Networks_order       402
Schaefer2018_400Parcels_17Networks_order      402
CC400                                         376
glasser                                       362
gordon333dil                                  335
Schaefer2018_300Parcels_17Networks_order      302
Schaefer2018_300Parcels_7Networks_order     

In [24]:
example_df[example_df['atlas'] == 'Yeo2011_17Networks_N1000']

Unnamed: 0,subject_id,session_id,atlas,hemisphere,StructName,NumVert,SurfArea,GrayVol,ThickAvg,ThickStd,...,StdDev_wgpct,Min_wgpct,Max_wgpct,Range_wgpct,SNR_wgpct,Mean_piallgi,StdDev_piallgi,Min_piallgi,Max_piallgi,Range_piallgi
4061,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,lh,Background+FreeSurfer_Defined_Medial_Wall,9377,6146,2370,0.847,1.266,...,10.4840,-60.4127,96.6036,157.0164,0.3702,2.4985,0.4165,1.6581,4.1347,2.4766
4062,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,lh,7Networks_LH_Vis_1,1530,1026,3474,3.051,0.686,...,6.2311,-14.3769,39.6908,54.0676,3.4481,2.9414,0.0616,2.7804,3.1105,0.3301
4063,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,lh,7Networks_LH_Vis_2,2800,1826,4315,2.258,0.674,...,5.3548,0.5998,34.7106,34.1108,3.3202,2.7257,0.1347,2.5275,3.0700,0.5426
4064,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,lh,7Networks_LH_Vis_3,2142,1475,3428,2.290,0.623,...,5.2878,-2.4538,33.2255,35.6793,3.1517,2.9849,0.0999,2.7301,3.2365,0.5065
4065,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,lh,7Networks_LH_Vis_4,3863,2399,4909,1.935,0.574,...,5.5936,-5.8710,48.8780,54.7490,3.0730,2.6997,0.1200,2.4369,3.0926,0.6558
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4158,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,rh,7Networks_RH_Default_PFCdPFCm_1,3936,2717,9216,2.847,0.572,...,6.9005,-18.0298,48.6628,66.6926,3.5293,2.2532,0.0519,2.0850,2.3633,0.2783
4159,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,rh,7Networks_RH_Default_PFCdPFCm_2,3916,2718,10017,3.070,0.582,...,6.3335,-4.3204,45.1907,49.5111,4.1656,2.3121,0.1599,2.0256,2.9005,0.8749
4160,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,rh,7Networks_RH_Default_PFCdPFCm_3,2417,1665,5059,2.772,0.484,...,5.0330,1.3143,42.3498,41.0355,5.0799,2.8891,0.1718,2.3637,3.3122,0.9485
4161,sub-1000393599,,Schaefer2018_100Parcels_7Networks_order,rh,7Networks_RH_Default_pCunPCC_1,1256,845,2554,2.900,0.441,...,4.9707,-1.2928,34.3984,35.6913,4.0733,3.4590,0.1841,2.7609,3.7026,0.9417
