In [None]:
# Initial Imports - for proper python "folder gymnastics"

import sys
from pathlib import Path

# Get directory of current notebook, go up two levels, and convert to a string
root_path = str(Path.cwd().parent.parent)

# Add that root directory to sys.path so Python can "see" config.py
if root_path not in sys.path:
    sys.path.append(root_path)

from config import DATABASE_URL

In [None]:
# Additional Imports
# For Apple Silicon machines, need: brew install libomp
import pandas as pd
import sklearn
import xgboost as xgb
import shap
import imblearn

from sqlalchemy import create_engine

In [None]:
# Show library version numbers

print(f"Pandas:       {pd.__version__}")
print(f"Scikit-Learn: {sklearn.__version__}")
print(f"XGBoost:      {xgb.__version__}")
print(f"SHAP:         {shap.__version__}")
print(f"ImbLearn:     {imblearn.__version__}")

print(f"DATABASE_URL: {DATABASE_URL}")

In [None]:
# Connecting to med-z1 PostgreSQL
# Database Schema to Query:
#  - clinical.patient_encounters - Hospital admissions (your target events)
#  - clinical.patient_demographics - Age, sex, DOB
#  - clinical.patient_medications_outpatient - Active meds (polypharmacy feature)
#  - clinical.patient_vitals - BP, weight, temp (clinical instability features)
#  - clinical.patient_labs - Creatinine, Hgb (abnormal lab features)

# Create database connection
engine = create_engine(DATABASE_URL)

In [None]:
# Let's try a database query (patient_demographics)

patient_demographics_query = """
SELECT
    patient_key,
    patient_sid,
    icn,
    ssn,
    name_last,
    name_first,
    age
FROM clinical.patient_demographics
ORDER BY name_last
"""

# Load into pandas DataFrame
patient_demographics_df = pd.read_sql(patient_demographics_query, engine)

patient_demographics_df

In [None]:
# Let's try a database query (patient_encounters)

# Query 1
pt_encounters_count = """
    select count(*)
    from clinical.patient_encounters
"""

# Query 2
pt_encounters_query = """
SELECT
    e.patient_key,
    d.name_first as first,
    d.name_last as last,
    d.age,
    d.sex,
    e.sta3n,
    e.admitting_provider_name as admit_provider,
    e.admit_datetime::DATE as admit_date,
    e.discharge_datetime::DATE as discharge_date,
    e.discharge_disposition as disposition
FROM clinical.patient_encounters as e
INNER JOIN clinical.patient_demographics AS d 
    ON e.patient_key = d.patient_key
WHERE e.discharge_datetime IS NOT NULL
ORDER BY e.patient_key DESC, e.discharge_datetime
"""

# Load Query 1 into pandas DataFrame and display
pt_encounters_df = pd.read_sql(pt_encounters_count, engine)

# Load Query 2 into pandas DataFrame and display
pt_encounters_df = pd.read_sql(pt_encounters_query, engine)
print(f"Shape: {pt_encounters_df.shape}")
pt_encounters_df.head(35)

In [None]:
# Display again
print(f"Shape: {pt_encounters_df.shape}")
pt_encounters_df.tail(35)

In [None]:
# Sort DataFrame
pt_encounters_df = pt_encounters_df.sort_values(['patient_key', 'discharge_date'])
pt_encounters_df