In [1]:
# 0_fetch_and_store_metadata.ipynb
# Interactive notebook to create and manage metadata for pediatric PK raw files

# ----------------------------------
# 1. Import libraries
# ----------------------------------
import pandas as pd
import os
from IPython.display import display

# ----------------------------------
# 2. Define raw folder and metadata file path
# ----------------------------------
RAW_DIR = '/data/raw'
METADATA_FILE = os.path.join(RAW_DIR, 'metadata.csv')
os.makedirs(RAW_DIR, exist_ok=True)

# ----------------------------------
# 3. List all raw PK CSV files
# ----------------------------------
raw_files = [
    'gentamicin_raw.csv',
    'metoprolol_raw.csv',
    'midazolam_raw.csv',
    'morphine_raw.csv',
    'simvastatin_raw.csv',
    'zidovudine_raw.csv'
]

raw_paths = [os.path.join(RAW_DIR, f) for f in raw_files]
print('Raw PK files:')
for p in raw_paths:
    print('-', p)

# ----------------------------------
# 4. Create starter metadata DataFrame
# ----------------------------------
metadata_entries = [
    {'study_id': 'genta_neonate_2013_agg', 'drug': 'gentamicin', 'source_file': 'gentamicin_raw.csv', 'pathway': 'renal', 'elimination': 'renal', 'formulation': 'iv', 'notes': 'Neonatal clearance'},
    {'study_id': 'metoprolol_fda_bpca_agg', 'drug': 'metoprolol', 'source_file': 'metoprolol_raw.csv', 'pathway': 'CYP2D6 hepatic metabolism', 'elimination': 'hepatic/renal', 'formulation': 'oral', 'notes': 'Pediatric summary Cmin/Cmax'},
    {'study_id': 'vet_et_al_2014_agg', 'drug': 'midazolam', 'source_file': 'midazolam_raw.csv', 'pathway': 'CYP3A4 hepatic metabolism', 'elimination': 'hepatic', 'formulation': 'iv', 'notes': 'Critically ill neonates/children'},
    {'study_id': 'morph_iv_surgery_1998_agg', 'drug': 'morphine', 'source_file': 'morphine_raw.csv', 'pathway': 'UGT2B7 hepatic metabolism', 'elimination': 'hepatic', 'formulation': 'iv', 'notes': 'Postoperative infants'},
    {'study_id': 'simva_pbpk_children_2019_agg', 'drug': 'simvastatin', 'source_file': 'simvastatin_raw.csv', 'pathway': 'hepatic/biliary', 'elimination': 'hepatic', 'formulation': 'oral', 'notes': 'Children/adolescents PBPK model'},
    {'study_id': 'zidovudine_preterm_2005_agg', 'drug': 'zidovudine', 'source_file': 'zidovudine_raw.csv', 'pathway': 'UGT/renal', 'elimination': 'hepatic/renal', 'formulation': 'oral', 'notes': 'Preterm neonates'}
]

metadata_df = pd.DataFrame(metadata_entries)
display(metadata_df)

# ----------------------------------
# 5. Edit metadata interactively (optional)
# ----------------------------------
# Users can modify entries here, e.g. metadata_df.loc[0,'notes'] = 'Updated note'
display(metadata_df)

# ----------------------------------
# 6. Save metadata to CSV
# ----------------------------------
metadata_df.to_csv(METADATA_FILE, index=False)
print(f'Metadata saved to {METADATA_FILE}')


Raw PK files:
- /Users/cmontefusco/Pediatric PK Data Repository/data/raw/gentamicin_raw.csv
- /Users/cmontefusco/Pediatric PK Data Repository/data/raw/metoprolol_raw.csv
- /Users/cmontefusco/Pediatric PK Data Repository/data/raw/midazolam_raw.csv
- /Users/cmontefusco/Pediatric PK Data Repository/data/raw/morphine_raw.csv
- /Users/cmontefusco/Pediatric PK Data Repository/data/raw/simvastatin_raw.csv
- /Users/cmontefusco/Pediatric PK Data Repository/data/raw/zidovudine_raw.csv


Unnamed: 0,study_id,drug,source_file,pathway,elimination,formulation,notes
0,genta_neonate_2013_agg,gentamicin,gentamicin_raw.csv,renal,renal,iv,Neonatal clearance
1,metoprolol_fda_bpca_agg,metoprolol,metoprolol_raw.csv,CYP2D6 hepatic metabolism,hepatic/renal,oral,Pediatric summary Cmin/Cmax
2,vet_et_al_2014_agg,midazolam,midazolam_raw.csv,CYP3A4 hepatic metabolism,hepatic,iv,Critically ill neonates/children
3,morph_iv_surgery_1998_agg,morphine,morphine_raw.csv,UGT2B7 hepatic metabolism,hepatic,iv,Postoperative infants
4,simva_pbpk_children_2019_agg,simvastatin,simvastatin_raw.csv,hepatic/biliary,hepatic,oral,Children/adolescents PBPK model
5,zidovudine_preterm_2005_agg,zidovudine,zidovudine_raw.csv,UGT/renal,hepatic/renal,oral,Preterm neonates


Unnamed: 0,study_id,drug,source_file,pathway,elimination,formulation,notes
0,genta_neonate_2013_agg,gentamicin,gentamicin_raw.csv,renal,renal,iv,Neonatal clearance
1,metoprolol_fda_bpca_agg,metoprolol,metoprolol_raw.csv,CYP2D6 hepatic metabolism,hepatic/renal,oral,Pediatric summary Cmin/Cmax
2,vet_et_al_2014_agg,midazolam,midazolam_raw.csv,CYP3A4 hepatic metabolism,hepatic,iv,Critically ill neonates/children
3,morph_iv_surgery_1998_agg,morphine,morphine_raw.csv,UGT2B7 hepatic metabolism,hepatic,iv,Postoperative infants
4,simva_pbpk_children_2019_agg,simvastatin,simvastatin_raw.csv,hepatic/biliary,hepatic,oral,Children/adolescents PBPK model
5,zidovudine_preterm_2005_agg,zidovudine,zidovudine_raw.csv,UGT/renal,hepatic/renal,oral,Preterm neonates


Metadata saved to /Users/cmontefusco/Pediatric PK Data Repository/data/raw/metadata.csv
