In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import os
import pygal
from abcd.local.paths import core_path, output_path
from abcd.data.read_data import read_events_general_info, add_subject_sex, add_subject_ethnicity, add_event_vars, add_subject_vars, filter_events
from abcd.data.define_splits import SITES, save_restore_sex_fmri_splits
from abcd.data.var_tailoring.normalization import normalize_var
from abcd.plotting.pygal.rendering import display_html
from abcd.plotting.pygal.colors import palette
import abcd.data.VARS as VARS

In [3]:
# Base dataframe with site id, family id, sex and ethnicity information
subjects_df, events_df = read_events_general_info()
print("There are {} subjects and {} visits".format(len(subjects_df), len(events_df)))
subjects_df, events_df = add_subject_sex(subjects_df, events_df)
print("There are {} subjects and {} visits".format(len(subjects_df), len(events_df)))
subjects_df, events_df = add_subject_ethnicity(subjects_df, events_df)
print("There are {} subjects and {} visits".format(len(subjects_df), len(events_df)))

There are 11685 subjects and 88888 visits


100%|██████████| 11685/11685 [00:40<00:00, 291.02it/s]


There are 9908 subjects and 80171 visits


100%|██████████| 9908/9908 [00:28<00:00, 343.65it/s]

There are 9907 subjects and 80166 visits





In [4]:
# CBCL
mh_events_df = add_event_vars(events_df, VARS.CBCL_PATH, vars=list(VARS.CBCL_SCORES_raw.keys()))
mh_events_df = mh_events_df.dropna() 
print("There are {} visits with CBCL scores".format(len(mh_events_df)))

There are 38743 visits with CBCL scores


In [5]:
# Neurocognition
neuro_events_df = add_event_vars(events_df, VARS.NIH_PATH, vars=list(VARS.NIH_TESTS_uncorrected.keys()))
neuro_events_df = neuro_events_df.dropna() 
print("There are {} visits with Neurocognition scores".format(len(neuro_events_df)))

There are 9888 visits with Neurocognition scores


In [6]:
# Structural MRI
for smri_feature in VARS.STRUCT_FEATURES.keys():
    smri_events_df = add_event_vars(events_df, os.path.join(core_path, "imaging", VARS.STRUCT_FILES[smri_feature]), vars=[smri_feature])
    smri_events_df = smri_events_df.dropna() 
print("There are {} visits with structural MRI info".format(len(smri_events_df)))

There are 19163 visits with structural MRI info


In [7]:
# Functional MRI
fmri_events_df = add_event_vars(events_df, VARS.fMRI_PATH, vars=list(VARS.NAMED_CONNECTIONS.keys()))
fmri_events_df = fmri_events_df.dropna() 
print("There are {} visits with functional MRI info".format(len(fmri_events_df)))

There are 19327 visits with functional MRI info


In [8]:
dfs = {"General": events_df, "CBCL": mh_events_df, "Neurocog.": neuro_events_df, "sMRI": smri_events_df, "fMRI": fmri_events_df}

In [9]:
visit_ixs = [0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4]
visit_names = {0: "baseline_year_1_arm_1", 0.5: "6_month_follow_up_arm_1", 1: "1_year_follow_up_y_arm_1", 1.5: "18_month_follow_up_arm_1", 2: "2_year_follow_up_y_arm_1", 2.5: "30_month_follow_up_arm_1", 3: "3_year_follow_up_y_arm_1", 3.5: "42_month_follow_up_arm_1", 4: "4_year_follow_up_y_arm_1"}
custom_style = pygal.style.Style(
    colors=tuple([palette['mid_gray'], palette['red'], palette['purple'], palette['mint'], palette['light_blue']])
    )
bar_chart = pygal.Bar(x_label_rotation=45, style=custom_style)

plot = pygal.Bar(style=custom_style)
plot.title = "Nr. subjects per visit"
plot.x_labels = visit_ixs
plot.x_title = "Years since baseline visit"
plot.y_title = "Nr. subjects"
for df_name, df in dfs.items():
    nr_subjects = [len(df["eventname"].loc[(df["eventname"] == visit_names[visit_ix])]) for visit_ix in visit_ixs]
    plot.add(df_name, nr_subjects)
display_html(plot)