### Show case numbers of DSM-5 diagnoses

In [1]:
from pathlib import Path

# OPTIONAL: Load the "autoreload" extension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

import numpy as np
import pandas as pd

from src.data.preprocess_data import create_binary_diagnoses_df, select_one_child_per_family
from src.data.var_names import diagnoses
from src.definitions import REPO_ROOT, RAW_DATA_DIR, PROCESSED_DATA_DIR

In [2]:
import numpy as np
import pandas as pd

abcd_data_path=Path(RAW_DATA_DIR)
subindicators_table=pd.read_csv(RAW_DATA_DIR / 'subindicators_table.csv')

subindicators_table

Unnamed: 0,Diagnosis,Spec,Interview,File,Feature
0,Other Specified Neurodevelopmental Disorder Au...,current,parent interview,abcd_ksad01,ksads_18_903_p
1,Major Depressive Disorder,current,parent interview,abcd_ksad01,ksads_1_840_p
2,Major Depressive Disorder,past,parent interview,abcd_ksad01,ksads_1_842_p
3,Major Depressive Disorder,current,youth interview,abcd_ksad501,ksads_1_840_t
4,Major Depressive Disorder,past,youth interview,abcd_ksad501,ksads_1_842_t
...,...,...,...,...,...
116,Anorexia Nervosa,past restricting subtype,youth interview,abcd_ksad501,ksads_13_934_t
117,Bulimia Nervosa,current,parent interview,abcd_ksad01,ksads_13_935_p
118,Bulimia Nervosa,past,parent interview,abcd_ksad01,ksads_13_936_p
119,Bulimia Nervosa,current,youth interview,abcd_ksad501,ksads_13_935_t


In [3]:
import copy
from functools import partial


def create_binary_diagnoses_df_detailed(
    abcd_data_path:Path,
    subindicators_table:pd.DataFrame,
    or_rule:bool,
    and_rule:bool
):
    # we want to summarize Bipolar I and II Disorder
    subindicators_table_ = copy.deepcopy(subindicators_table)
    subindicators_table_ = subindicators_table_.replace(to_replace='Bipolar I Disorder', value='Bipolar Disorder')
    subindicators_table_ = subindicators_table_.replace(to_replace='Bipolar II Disorder', value='Bipolar Disorder')

    # open files
    opened_dfs = []
    for filename in subindicators_table_['File'].unique():
        
        columns = list(subindicators_table_[subindicators_table_['File'] == filename]['Feature']) + ['src_subject_id']
        new_file = pd.read_csv(abcd_data_path / f'{filename}.txt', sep='\t', skiprows=(1, 1))
        # we are only interested in the baseline assessment of the ABCD study
        new_file = new_file.loc[new_file['eventname'] == "baseline_year_1_arm_1"]
        # capitalize subject id to avoid false mismatches lateron
        new_file['src_subject_id'] = new_file['src_subject_id'].str.upper()
        # select only relevant columns and save to list
        new_file = new_file[columns]
        opened_dfs.append(new_file)
                
    # merge to one df
    raw_subindicators_df = opened_dfs[0]
    for df in opened_dfs[1:]:
        raw_subindicators_df = raw_subindicators_df.merge(right=df, how='outer', on='src_subject_id')
    raw_subindicators_df.index = raw_subindicators_df['src_subject_id']
    raw_subindicators_df = raw_subindicators_df.drop(columns=['src_subject_id'])
    
    # first, apply or-rule for labels 'within interviewees'
    # e.g. if MDD has been diagnosed either at present or in the past in only the youth interview,
    # the youth label is positive
    dict_series = {}
    for diagnosis in subindicators_table_['Diagnosis'].unique():
        for interviewee in ['parent', 'youth']:
            cols = subindicators_table_[
                (subindicators_table_['Diagnosis'] == diagnosis) & \
                (subindicators_table_['Interview'] == f'{interviewee} interview')
            ]['Feature']
            if len(cols) == 0:
                break
            summarize_or = partial(summarize, rule='or')
            dict_series[f'{diagnosis}_{interviewee}'] = raw_subindicators_df[cols].apply(summarize_or, axis=1)
    interviewee_labels_df = pd.DataFrame(dict_series)
    
    # summarize within-interviewee labels via and/or-rule
    dict_series = {}
    for diagnosis in subindicators_table_['Diagnosis'].unique():
        cols = [f'{diagnosis}_{interviewee}' for interviewee in ['parent', 'youth'] \
                if f'{diagnosis}_{interviewee}' in interviewee_labels_df.columns]
        if or_rule:
            summarize_or = partial(summarize, rule='or')
            dict_series[f'{diagnosis}_or_rule'] = interviewee_labels_df[cols].apply(summarize_or, axis=1)
        if and_rule:
            summarize_and = partial(summarize, rule='and')
            dict_series[f'{diagnosis}_and_rule'] = interviewee_labels_df[cols].apply(summarize_and, axis=1)
            
    return pd.DataFrame(dict_series)


def summarize(x:pd.Series, rule:str):
    assert rule == 'or' or rule == 'and', "rule keyword can only be 'or' or 'and'"
    if rule == 'or':
        # if at leats one positive value -> overall positive
        if 1.0 in x.values:
            return 1.0
        # if no positive value and at least one NaN -> we cannot know -> overall NaN
        elif x.isnull().any():
            return np.nan
        # if only negative values -> safely overall negative
        else:
            return 0.0
    elif rule == 'and':
        # if all values positive -> overall positive
        if x.all() and not x.isnull().any():
            return 1.0
        # if some values negative -> overall negative
        elif 0.0 in x.values:
            return 0.0
        # if only positive and NaN values -> we cannot know -> overall NaN
        else:
            return np.nan

In [4]:
binary_diagnoses = create_binary_diagnoses_df_detailed(RAW_DATA_DIR, subindicators_table, True, True)

In [15]:
binary_diagnoses

Unnamed: 0_level_0,Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_or_rule,Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_and_rule,Major Depressive Disorder_or_rule,Major Depressive Disorder_and_rule,Persistent Depressive Disorder (Dysthymia)_or_rule,Persistent Depressive Disorder (Dysthymia)_and_rule,Bipolar Disorder_or_rule,Bipolar Disorder_and_rule,Psychotic Symptoms_or_rule,Psychotic Symptoms_and_rule,...,Selective Mutism_or_rule,Selective Mutism_and_rule,Specific Phobia_or_rule,Specific Phobia_and_rule,Binge-Eating Disorder_or_rule,Binge-Eating Disorder_and_rule,Anorexia Nervosa_or_rule,Anorexia Nervosa_and_rule,Bulimia Nervosa_or_rule,Bulimia Nervosa_and_rule
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV9EVRB30H,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
NDAR_INV0AUBJJJ4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INV0CTJAAHC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INV0D83M5VE,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
NDAR_INV0DKWEM1A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVNTJF26D6,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,
NDAR_INVYN4CXY1B,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,
NDAR_INVBE7CJ121,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,
NDAR_INVFMCMJ0X2,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,


In [16]:
for diagnosis in subindicators_table['Diagnosis'].unique():
    if diagnosis == 'Bipolar I Disorder':
        diagnosis = 'Bipolar Disorder'
    if diagnosis == 'Bipolar II Disorder':
        continue
    print(diagnosis)
    # or rule
    col = f'{diagnosis}_or_rule'
    n_true = len(binary_diagnoses[binary_diagnoses[col] == 1.0])
    n_false = len(binary_diagnoses[binary_diagnoses[col] == 0.0])
    n_nan = len(binary_diagnoses[binary_diagnoses[col].isna()])
    print(f'  OR rule:  {n_true:5d} of {n_true + n_false:5d} positive ({100 * n_true/(n_true + n_false + 1e-10):.3f} %). {n_nan:5d} NaN values.')
    # and rule
    col = f'{diagnosis}_and_rule'
    n_true = len(binary_diagnoses[binary_diagnoses[col] == 1.0])
    n_false = len(binary_diagnoses[binary_diagnoses[col] == 0.0])
    n_nan = len(binary_diagnoses[binary_diagnoses[col].isna()])
    print(f'  AND rule: {n_true:5d} of {n_true + n_false:5d} positive ({100 * n_true/(n_true + n_false + 1e-10):.3f} %). {n_nan:5d} NaN values.')

Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder
  OR rule:   3247 of 11690 positive (27.776 %).   184 NaN values.
  AND rule:  3247 of 11690 positive (27.776 %).   184 NaN values.
Major Depressive Disorder
  OR rule:    569 of 11661 positive (4.880 %).   213 NaN values.
  AND rule:    23 of 11834 positive (0.194 %).    40 NaN values.
Persistent Depressive Disorder (Dysthymia)
  OR rule:     24 of 11660 positive (0.206 %).   214 NaN values.
  AND rule:     0 of 11835 positive (0.000 %).    39 NaN values.
Bipolar Disorder
  OR rule:    878 of 11668 positive (7.525 %).   206 NaN values.
  AND rule:    29 of 11827 positive (0.245 %).    47 NaN values.
Psychotic Symptoms
  OR rule:    315 of 11721 positive (2.687 %).   153 NaN values.
  AND rule:   315 of 11721 positive (2.687 %).   153 NaN values.
ADHD
  OR rule:   2192 of 11701 positive (18.733 %).   173 NaN values.
  AND rule:  2192 of 11701 positive (18.733 %).   173 NaN values.
Oppositional Defiant Disorder
  OR ru

In [12]:
abcd_data = pd.read_csv(PROCESSED_DATA_DIR / 'abcd_data.csv')
abcd_data

  abcd_data = pd.read_csv(PROCESSED_DATA_DIR / 'abcd_data.csv')


Unnamed: 0,src_subject_id,age,female,married,race_ethnicity_Asian,race_ethnicity_Black,race_ethnicity_Hispanic,race_ethnicity_Other,race_ethnicity_White,high.educ_< HS Diploma,...,Generalized Anxiety Disorder,Social Anxiety Disorder,Panic Disorder,Agoraphobia,Separation Anxiety Disorder,Selective Mutism,Specific Phobia,Binge-Eating Disorder,Anorexia Nervosa,Bulimia Nervosa
0,NDAR_INV007W6H7B,126,0,1,0,0,0,0,1,0,...,True,False,False,False,False,,False,,,
1,NDAR_INV00LJVZK2,121,0,0,0,0,0,1,0,1,...,False,False,False,False,False,,False,,,
2,NDAR_INV00NPMHND,118,1,1,0,0,0,0,1,0,...,True,False,False,False,False,,True,,,
3,NDAR_INV00R4TXET,114,1,1,0,0,0,0,1,0,...,False,False,False,,False,,False,,,
4,NDAR_INV00U4FTRU,130,1,0,0,0,0,1,0,0,...,True,True,False,,False,,False,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7183,NDAR_INVZZJ3A7BK,122,1,0,0,0,0,0,1,0,...,False,False,False,False,False,,True,,,
7184,NDAR_INVZZL0VA2F,129,0,1,0,1,0,0,0,0,...,False,False,False,False,False,,False,,,
7185,NDAR_INVZZLZCKAY,110,1,1,0,0,0,0,1,0,...,False,False,False,False,False,,True,,,
7186,NDAR_INVZZZ2ALR6,120,1,1,0,0,0,1,0,0,...,False,False,False,False,False,,True,,,


In [17]:
{col: col.replace('_or_rule', '') for col in binary_diagnoses.columns}

{'Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_or_rule': 'Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder',
 'Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_and_rule': 'Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_and_rule',
 'Major Depressive Disorder_or_rule': 'Major Depressive Disorder',
 'Major Depressive Disorder_and_rule': 'Major Depressive Disorder_and_rule',
 'Persistent Depressive Disorder (Dysthymia)_or_rule': 'Persistent Depressive Disorder (Dysthymia)',
 'Persistent Depressive Disorder (Dysthymia)_and_rule': 'Persistent Depressive Disorder (Dysthymia)_and_rule',
 'Bipolar Disorder_or_rule': 'Bipolar Disorder',
 'Bipolar Disorder_and_rule': 'Bipolar Disorder_and_rule',
 'Psychotic Symptoms_or_rule': 'Psychotic Symptoms',
 'Psychotic Symptoms_and_rule': 'Psychotic Symptoms_and_rule',
 'ADHD_or_rule': 'ADHD',
 'ADHD_and_rule': 'ADHD_and_rule',
 'Oppositional Defiant Disorder_or_rule': 'O

In [7]:
binary_diagnoses

Unnamed: 0_level_0,Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_or_rule,Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_and_rule,Major Depressive Disorder_or_rule,Major Depressive Disorder_and_rule,Persistent Depressive Disorder (Dysthymia)_or_rule,Persistent Depressive Disorder (Dysthymia)_and_rule,Bipolar Disorder_or_rule,Bipolar Disorder_and_rule,Psychotic Symptoms_or_rule,Psychotic Symptoms_and_rule,...,Selective Mutism_or_rule,Selective Mutism_and_rule,Specific Phobia_or_rule,Specific Phobia_and_rule,Binge-Eating Disorder_or_rule,Binge-Eating Disorder_and_rule,Anorexia Nervosa_or_rule,Anorexia Nervosa_and_rule,Bulimia Nervosa_or_rule,Bulimia Nervosa_and_rule
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV9EVRB30H,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
NDAR_INV0AUBJJJ4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INV0CTJAAHC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INV0D83M5VE,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
NDAR_INV0DKWEM1A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVNTJF26D6,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,
NDAR_INVYN4CXY1B,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,
NDAR_INVBE7CJ121,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,
NDAR_INVFMCMJ0X2,,,,0.0,,0.0,,0.0,,,...,,,,,,,,,,


In [10]:
binary_diagnoses_2 = binary_diagnoses.loc[abcd_data['src_subject_id']]

In [11]:
binary_diagnoses_2

Unnamed: 0_level_0,Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_or_rule,Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder_and_rule,Major Depressive Disorder_or_rule,Major Depressive Disorder_and_rule,Persistent Depressive Disorder (Dysthymia)_or_rule,Persistent Depressive Disorder (Dysthymia)_and_rule,Bipolar Disorder_or_rule,Bipolar Disorder_and_rule,Psychotic Symptoms_or_rule,Psychotic Symptoms_and_rule,...,Selective Mutism_or_rule,Selective Mutism_and_rule,Specific Phobia_or_rule,Specific Phobia_and_rule,Binge-Eating Disorder_or_rule,Binge-Eating Disorder_and_rule,Anorexia Nervosa_or_rule,Anorexia Nervosa_and_rule,Bulimia Nervosa_or_rule,Bulimia Nervosa_and_rule
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV007W6H7B,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INV00LJVZK2,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INV00NPMHND,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
NDAR_INV00R4TXET,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INV00U4FTRU,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVZZJ3A7BK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
NDAR_INVZZL0VA2F,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,0.0,0.0,,0.0,,0.0,,0.0
NDAR_INVZZLZCKAY,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,,,1.0,1.0,,0.0,,0.0,,0.0
NDAR_INVZZZ2ALR6,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,1.0,1.0,,0.0,,0.0,,0.0


In [15]:
for diagnosis in subindicators_table['Diagnosis'].unique():
    if diagnosis == 'Bipolar I Disorder':
        diagnosis = 'Bipolar Disorder'
    if diagnosis == 'Bipolar II Disorder':
        continue
    print(diagnosis)
    # abcd_data
    col = f'{diagnosis}'
    n_true = len(abcd_data[abcd_data[col] == 1.0])
    n_false = len(abcd_data[abcd_data[col] == 0.0])
    n_nan = len(abcd_data[abcd_data[col].isna()])
    print(f'  abcd_data:           {n_true:5d} of {n_true + n_false:5d} positive ({100 * n_true/(n_true + n_false + 1e-10):.3f} %). {n_nan:5d} NaN values.')
    # new code data
    col = f'{diagnosis}_or_rule'
    n_true = len(binary_diagnoses_2[binary_diagnoses_2[col] == 1.0])
    n_false = len(binary_diagnoses_2[binary_diagnoses_2[col] == 0.0])
    n_nan = len(binary_diagnoses_2[binary_diagnoses_2[col].isna()])
    print(f'  binary_diagnoses_2:  {n_true:5d} of {n_true + n_false:5d} positive ({100 * n_true/(n_true + n_false + 1e-10):.3f} %). {n_nan:5d} NaN values.')

Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder
  abcd_data:            1965 of  7087 positive (27.727 %).   101 NaN values.
  binary_diagnoses_2:   1965 of  7087 positive (27.727 %).   101 NaN values.
Major Depressive Disorder
  abcd_data:             340 of  7067 positive (4.811 %).   121 NaN values.
  binary_diagnoses_2:    340 of  7067 positive (4.811 %).   121 NaN values.
Persistent Depressive Disorder (Dysthymia)
  abcd_data:              15 of  7066 positive (0.212 %).   122 NaN values.
  binary_diagnoses_2:     15 of  7066 positive (0.212 %).   122 NaN values.
Bipolar Disorder
  abcd_data:             517 of  7072 positive (7.311 %).   116 NaN values.
  binary_diagnoses_2:    517 of  7072 positive (7.311 %).   116 NaN values.
Psychotic Symptoms
  abcd_data:             189 of  7098 positive (2.663 %).    90 NaN values.
  binary_diagnoses_2:    189 of  7098 positive (2.663 %).    90 NaN values.
ADHD
  abcd_data:            1356 of  7089 positive (19.128 %). 

In [22]:
new_abcd_data = pd.read_csv(PROCESSED_DATA_DIR / 'abcd_data.csv')

In [24]:
for diagnosis in subindicators_table['Diagnosis'].unique():
    if diagnosis == 'Bipolar I Disorder':
        diagnosis = 'Bipolar Disorder'
    if diagnosis == 'Bipolar II Disorder':
        continue
    print(diagnosis)
    # abcd_data
    col = f'{diagnosis}'
    n_true = len(new_abcd_data[new_abcd_data[col] == 1.0])
    n_false = len(new_abcd_data[new_abcd_data[col] == 0.0])
    n_nan = len(new_abcd_data[new_abcd_data[col].isna()])
    print(f'  new_abcd_data:       {n_true:5d} of {n_true + n_false:5d} positive ({100 * n_true/(n_true + n_false + 1e-10):.3f} %). {n_nan:5d} NaN values.')
    # new code data
    col = f'{diagnosis}_or_rule'
    n_true = len(binary_diagnoses_2[binary_diagnoses_2[col] == 1.0])
    n_false = len(binary_diagnoses_2[binary_diagnoses_2[col] == 0.0])
    n_nan = len(binary_diagnoses_2[binary_diagnoses_2[col].isna()])
    print(f'  binary_diagnoses_2:  {n_true:5d} of {n_true + n_false:5d} positive ({100 * n_true/(n_true + n_false + 1e-10):.3f} %). {n_nan:5d} NaN values.')

Other Specified Neurodevelopmental Disorder Autism Spectrum Disorder
  new_abcd_data:        1965 of  7087 positive (27.727 %).   101 NaN values.
  binary_diagnoses_2:   1965 of  7087 positive (27.727 %).   101 NaN values.
Major Depressive Disorder
  new_abcd_data:         340 of  7067 positive (4.811 %).   121 NaN values.
  binary_diagnoses_2:    340 of  7067 positive (4.811 %).   121 NaN values.
Persistent Depressive Disorder (Dysthymia)
  new_abcd_data:          15 of  7066 positive (0.212 %).   122 NaN values.
  binary_diagnoses_2:     15 of  7066 positive (0.212 %).   122 NaN values.
Bipolar Disorder
  new_abcd_data:         517 of  7072 positive (7.311 %).   116 NaN values.
  binary_diagnoses_2:    517 of  7072 positive (7.311 %).   116 NaN values.
Psychotic Symptoms
  new_abcd_data:         189 of  7098 positive (2.663 %).    90 NaN values.
  binary_diagnoses_2:    189 of  7098 positive (2.663 %).    90 NaN values.
ADHD
  new_abcd_data:        1356 of  7089 positive (19.128 %). 