In [1]:
# 4_pbpk_input_preparation.ipynb
# Notebook to generate PBPK-ready input tables from harmonized pediatric PK dataset

# ----------------------------------
# 1. Import libraries
# ----------------------------------
import pandas as pd
import os

# ----------------------------------
# 2. Load harmonized dataset
# ----------------------------------
HARMONIZED_FILE = '/data/curated/pediatric_pk_master_harmonized.csv'
df = pd.read_csv(HARMONIZED_FILE)
print('Harmonized dataset preview:')
display(df.head())

# ----------------------------------
# 3. Create PBPK input folder
# ----------------------------------
PBPK_DIR = '/data/curated/pbpk_inputs'
os.makedirs(PBPK_DIR, exist_ok=True)

# ----------------------------------
# 4. Define pediatric age groups
# ----------------------------------
AGE_GROUPS = {
    'preterm_neonate': (0, 0.08),  # ~0-28 days
    'term_neonate': (0.08, 0.28),  # 29-90 days
    'infant': (0.28, 1),           # 91 days - 1 year
    'child': (1, 12),               # 1-12 years
    'adolescent': (12, 18),         # 12-18 years
    'adult': (18, 100)
}

def assign_age_group(age):
    for group, (start, end) in AGE_GROUPS.items():
        if pd.notna(age) and start <= age < end:
            return group
    return 'unknown'

df['age_group_pbpk'] = df['age_years'].apply(assign_age_group)

# ----------------------------------
# 5. Generate PBPK summary tables per drug and age group
# ----------------------------------
pbpk_summary = df.groupby(['drug','age_group_pbpk']).agg(
    n_subjects=('subject_id','count'),
    mean_age=('age_years','mean'),
    mean_weight=('weight_kg','mean'),
    mean_cl=('cl_l_per_h_per_kg','mean'),
    sd_cl=('cl_l_per_h_per_kg','std'),
    mean_vd=('t_half_h','mean') # placeholder, user can replace with Vd if available
).reset_index()

# Save PBPK summary table
pbpk_summary_file = os.path.join(PBPK_DIR, 'pbpk_input_summary.csv')
pbpk_summary.to_csv(pbpk_summary_file, index=False)
print(f'PBPK summary table saved: {pbpk_summary_file}')
display(pbpk_summary)

# ----------------------------------
# 6. Optional: generate separate files per age group
# ----------------------------------
for group in df['age_group_pbpk'].unique():
    group_df = df[df['age_group_pbpk'] == group]
    if not group_df.empty:
        file_path = os.path.join(PBPK_DIR, f'pbpk_input_{group}.csv')
        group_df.to_csv(file_path, index=False)
        print(f'Saved PBPK input for age group {group}: {file_path}')


Harmonized dataset preview:


Unnamed: 0,study_id,drug,subject_id,age_years,age_days,weight_kg,cl_l_per_h_per_kg,t_half_h,route,aggregated_row,...,cl_ml_per_min_per_kg,cl_l_per_h,cmin_ng_per_ml,visit,dose_mg,auc_mg_h_per_l,cmax_mg_per_l,route_harmonized,pathway,pathway_harmonized
0,vet_et_al_2014_agg,midazolam,,0.006,2.2,2.5,0.14,,iv,True,...,,,,,,,,intravenous,CYP3A4 hepatic metabolism,hepatic_CYP3A4
1,vet_et_al_2014_agg,midazolam,,2.0,730.0,12.0,0.28,,iv,True,...,,,,,,,,intravenous,CYP3A4 hepatic metabolism,hepatic_CYP3A4
2,morph_iv_surgery_1998_agg,morphine,morph_1998_1_1,0.005,,,0.552,,iv,True,...,9.2,,,,,,,intravenous,UGT2B7 hepatic metabolism,hepatic_UGT2B7
3,morph_iv_surgery_1998_agg,morphine,morph_1998_1_2,0.005,,,0.552,,iv,True,...,9.2,,,,,,,intravenous,UGT2B7 hepatic metabolism,hepatic_UGT2B7
4,morph_iv_surgery_1998_agg,morphine,morph_1998_1_3,0.005,,,0.552,,iv,True,...,9.2,,,,,,,intravenous,UGT2B7 hepatic metabolism,hepatic_UGT2B7


PBPK summary table saved: /Users/cmontefusco/Pediatric PK Data Repository/data/curated/pbpk_inputs/pbpk_input_summary.csv


Unnamed: 0,drug,age_group_pbpk,n_subjects,mean_age,mean_weight,mean_cl,sd_cl,mean_vd
0,gentamicin,preterm_neonate,1,0.01,3.05,0.036,,
1,metoprolol,adolescent,1,12.0,,,,4.0
2,metoprolol,child,2,5.0,,,,4.0
3,midazolam,child,0,2.0,12.0,0.28,,
4,midazolam,preterm_neonate,0,0.006,2.5,0.14,,
5,morphine,infant,10,0.55,,2.397,0.566048,
6,morphine,preterm_neonate,5,0.005,,0.552,0.0,
7,morphine,term_neonate,5,0.1,,1.518,0.0,
8,simvastatin,adolescent,10,14.1,,,,
9,zidovudine,preterm_neonate,14,0.03,,0.2004,0.062888,5.85


Saved PBPK input for age group preterm_neonate: /Users/cmontefusco/Pediatric PK Data Repository/data/curated/pbpk_inputs/pbpk_input_preterm_neonate.csv
Saved PBPK input for age group child: /Users/cmontefusco/Pediatric PK Data Repository/data/curated/pbpk_inputs/pbpk_input_child.csv
Saved PBPK input for age group term_neonate: /Users/cmontefusco/Pediatric PK Data Repository/data/curated/pbpk_inputs/pbpk_input_term_neonate.csv
Saved PBPK input for age group infant: /Users/cmontefusco/Pediatric PK Data Repository/data/curated/pbpk_inputs/pbpk_input_infant.csv
Saved PBPK input for age group adolescent: /Users/cmontefusco/Pediatric PK Data Repository/data/curated/pbpk_inputs/pbpk_input_adolescent.csv
