Compare *Generalized Anxiety Disorder* from the perspective of the child and the parent.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

import src.data.var_names as abcd_vars

In [2]:
df_parent = pd.read_csv(
    "data/raw/abcd_ksad01.txt", header=1, sep='\t', low_memory=False, index_col=4
).query(
    "`The event name for which the data was collected` == 'baseline_year_1_arm_1'",
)
df_youth = pd.read_csv(
    "data/raw/abcd_ksad501.txt", header=1, sep='\t', low_memory=False, index_col=4
).query(
    "`The event name for which the data was collected` == 'baseline_year_1_arm_1'",
)

In [3]:
gad_col_idx_parent = [i for i, x in enumerate(df_parent.columns) if "diagnosis - generalized anxiety" in x.lower()]
gad_col_idx_youth = [i for i, x in enumerate(df_youth.columns) if "diagnosis - generalized anxiety" in x.lower()]

df_youth.columns[gad_col_idx_youth]

Index(['Diagnosis - Generalized Anxiety Disorder Present (F41.1)', 'Diagnosis - Generalized Anxiety Disorder Past (F41.1)'], dtype='object')

In [4]:
data = pd.read_csv(
    "data/processed_one_child_per_family/abcd_data.csv", index_col=0
).dropna(
    subset=abcd_vars.all_brain_features.features + abcd_vars.sociodem.features + abcd_vars.diagnoses.features
)

In [5]:
idx_parent = data.index.intersection(df_parent.index)
idx_youth = data.index.intersection(df_youth.index)

data.shape[0], len(idx_parent), len(idx_youth)

(6916, 6916, 6916)

In [6]:
gad_parent = df_parent.loc[idx_parent, :].iloc[:, gad_col_idx_parent]
gad_youth = df_youth.loc[idx_youth, :].iloc[:, gad_col_idx_youth]

In [7]:
def combine(x):
    x = x.dropna()
    if len(x) == 0:
        return np.nan
    return (x == 1).any().astype(float)

combined = pd.concat((gad_parent, gad_youth), axis=1).agg(combine, axis=1)

In [12]:
for col in ("Present", "Past"):
    a = gad_parent.loc[:, gad_parent.columns.str.contains(col)].iloc[:, 0].rename(f"{col} Parent")
    b = gad_youth.loc[:, gad_youth.columns.str.contains(col)].iloc[:, 0].rename(f"{col} Youth")
    tab = pd.crosstab(a, b)
    display(tab)

display(HTML("<strong>Combined (OR-rule)</strong>"), combined.value_counts())

Present Youth,0.0,1.0
Present Parent,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,6813,17
1.0,84,2


Past Youth,0.0,1.0
Past Parent,Unnamed: 1_level_1,Unnamed: 2_level_1
0.0,6601,26
1.0,286,3


0.0    6548
1.0     368
dtype: int64