In [None]:
# Getting Started...

In [1]:
# Initial Imports
# For proper python "folder gymnastics"

import sys
from pathlib import Path

# Get the directory of the current notebook, go up two levels, and convert to a string
root_path = str(Path.cwd().parent.parent)

# Add that root directory to sys.path so Python can "see" config.py
if root_path not in sys.path:
    sys.path.append(root_path)

from config import DATABASE_URL


Loaded config from: /Users/chuck/swdev/med/med-z1/.env
      Project root: /Users/chuck/swdev/med/med-z1
    CDWWORK server: 127.0.0.1,1433 / DB: CDWWork
   CDWWORK2 server: 127.0.0.1,1433 / DB: CDWWork2
    MinIO endpoint: localhost:9000, bucket: med-z1
         USE_MINIO: True
        PostgreSQL: localhost:5432 / DB: medz1
      CCOW enabled: True, URL: http://localhost:8001
     Vista enabled: True, URL: http://localhost:8003
   Session timeout: 25 minutes



In [2]:
# Additional Imports
# For Apple Silicon machines: brew install libomp
import pandas as pd
import sklearn
import xgboost as xgb
import shap
import imblearn

from sqlalchemy import create_engine

In [3]:
# Show library version numbers

print(f"Pandas:       {pd.__version__}")
print(f"Scikit-Learn: {sklearn.__version__}")
print(f"XGBoost:      {xgb.__version__}")
print(f"SHAP:         {shap.__version__}")
print(f"ImbLearn:     {imblearn.__version__}")

print(f"DATABASE_URL: {DATABASE_URL}")

Pandas:       2.3.3
Scikit-Learn: 1.8.0
XGBoost:      3.1.3
SHAP:         0.50.0
ImbLearn:     0.14.1
DATABASE_URL: postgresql://postgres:AllieD-1993-2025-z1@localhost:5432/medz1


In [4]:
# Connecting to med-z1 PostgreSQL
# Database Schema to Query:
#  - clinical.patient_encounters - Hospital admissions (your target events)
#  - clinical.patient_demographics - Age, sex, DOB
#  - clinical.patient_medications_outpatient - Active meds (polypharmacy feature)
#  - clinical.patient_vitals - BP, weight, temp (clinical instability features)
#  - clinical.patient_labs - Creatinine, Hgb (abnormal lab features)

# Create database connection
engine = create_engine(DATABASE_URL)

In [5]:
# Let's try a database query (patient_demographics)

patient_demographics_query = """
SELECT
    patient_key,
    patient_sid,
    icn,
    ssn,
    name_last,
    name_first,
    age
FROM clinical.patient_demographics
ORDER BY name_last
"""

# Load into pandas DataFrame
patient_demographics_df = pd.read_sql(patient_demographics_query, engine)

patient_demographics_df

Unnamed: 0,patient_key,patient_sid,icn,ssn,name_last,name_first,age
0,ICN100007,1007,ICN100007,111221007,Amajor,Adam,86
1,ICN100010,1010,ICN100010,111221010,Aminor,Alexander,60
2,1022V789012,1022,1022V789012,444401022,Anderson,James,62
3,ICN100017,1027,ICN100017,999991027,Anderson,Robert,47
4,ICN100008,1008,ICN100008,111221008,Bmajor,Barry,65
5,1018V345678,1018,1018V345678,999991018,Chen,David,43
6,ICN100009,1009,ICN100009,111221009,Cmajor,Claire,66
7,ICN100001,1001,ICN100001,123456789,Dooree,Adam,46
8,ICN100005,1005,ICN100005,111111105,Dooree,Edward,44
9,1021V678901,1021,1021V678901,333301021,Garcia,Patricia,59


In [26]:
# Let's try a database query (patient_encounters)

# Query 1
pt_encounters_count = """
    select count(*)
    from clinical.patient_encounters
"""

# Query 2
pt_encounters_query = """
SELECT
    e.patient_key,
    d.name_first as first,
    d.name_last as last,
    d.age,
    d.sex,
    e.sta3n,
    e.admitting_provider_name as admit_provider,
    e.admit_datetime::DATE as admit_date,
    e.discharge_datetime::DATE as discharge_date,
    e.discharge_disposition as disposition
FROM clinical.patient_encounters as e
INNER JOIN clinical.patient_demographics AS d 
    ON e.patient_key = d.patient_key
WHERE e.discharge_datetime IS NOT NULL
ORDER BY e.patient_key DESC, e.discharge_datetime
"""

# Load Query 1 into pandas DataFrame and display
pt_encounters_df = pd.read_sql(pt_encounters_count, engine)

# Load Query 2 into pandas DataFrame and display
pt_encounters_df = pd.read_sql(pt_encounters_query, engine)
print(f"Shape: {pt_encounters_df.shape}")
pt_encounters_df.head(35)

Shape: (72, 10)


Unnamed: 0,patient_key,first,last,age,sex,sta3n,admit_provider,admit_date,discharge_date,disposition
0,ICN100017,Robert,Anderson,47,M,528,Carl Sagan,2019-06-20,2019-06-28,TRANSFERRED TO MENTAL HEALTH UNIT
1,ICN100017,Robert,Anderson,47,M,528,Carl Sagan,2023-03-10,2023-03-17,HOME WITH OUTPATIENT FOLLOW-UP
2,ICN100017,Robert,Anderson,47,M,528,Julius Hibbert,2024-11-15,2024-11-15,EXPIRED
3,ICN100016,Margaret,Wilson,87,F,508,Walter Reed,2024-11-25,2024-12-01,EXPIRED
4,ICN100014,Joyce,Kim,70,F,516,Marie Curry,2025-11-17,2025-11-22,SNF
5,ICN100013,Irving,Thompson,77,M,508,Walter Reed,2025-11-20,2025-11-24,Home
6,ICN100012,Helen,Martinez,68,F,688,Carl Sagan,2025-11-25,2025-11-25,Home
7,ICN100011,George,Harris,72,M,552,Florence Nightengale,2025-11-27,2025-11-27,Home
8,ICN100010,Alexander,Aminor,60,M,516,Marie Curry,2025-11-30,2025-11-30,Home
9,ICN100009,Claire,Cmajor,66,F,508,Florence Nightengale,2025-11-28,2025-12-03,Rehab


In [27]:
# Display again
print(f"Shape: {pt_encounters_df.shape}")
pt_encounters_df.tail(35)

Shape: (72, 10)


Unnamed: 0,patient_key,first,last,age,sex,sta3n,admit_provider,admit_date,discharge_date,disposition
37,ICN100001,Adam,Dooree,46,M,688,Julius Hibbert,2025-03-26,2025-03-30,Home with O2
38,ICN100001,Adam,Dooree,46,M,552,Julius Hibbert,2025-04-15,2025-04-19,Home
39,ICN100001,Adam,Dooree,46,M,516,Julius Hibbert,2025-05-05,2025-05-09,Home
40,ICN100001,Adam,Dooree,46,M,516,Marie Curry,2025-05-19,2025-05-25,Home
41,ICN100001,Adam,Dooree,46,M,508,Julius Hibbert,2025-05-25,2025-05-29,Rehab
42,ICN100001,Adam,Dooree,46,M,688,Julius Hibbert,2025-06-14,2025-06-18,SNF
43,ICN100001,Adam,Dooree,46,M,552,Julius Hibbert,2025-07-04,2025-07-08,Home with O2
44,ICN100001,Adam,Dooree,46,M,516,Julius Hibbert,2025-07-24,2025-07-28,Home
45,ICN100001,Adam,Dooree,46,M,508,Julius Hibbert,2025-08-13,2025-08-17,Home
46,ICN100001,Adam,Dooree,46,M,508,Florence Nightengale,2025-08-17,2025-08-20,Home


In [29]:
# Sort DataFrame
pt_encounters_df = pt_encounters_df.sort_values(['patient_key', 'discharge_date'])
pt_encounters_df

Unnamed: 0,patient_key,first,last,age,sex,sta3n,admit_provider,admit_date,discharge_date,disposition
71,1016V123456,Marcus,Johnson,29,M,516,Marie Curry,2025-10-16,2025-10-20,Home with O2
70,1017V234567,Sarah,Williams,36,F,552,Florence Nightengale,2025-09-16,2025-09-19,Home
69,1018V345678,David,Chen,43,M,688,Carl Sagan,2025-07-18,2025-07-23,Home
68,1019V456789,Linda,Rodriguez,49,F,508,Florence Nightengale,2025-06-18,2025-06-21,Home
67,1020V567890,Robert,Thompson,54,M,552,Florence Nightengale,2025-04-19,2025-04-22,Home
...,...,...,...,...,...,...,...,...,...,...
4,ICN100014,Joyce,Kim,70,F,516,Marie Curry,2025-11-17,2025-11-22,SNF
3,ICN100016,Margaret,Wilson,87,F,508,Walter Reed,2024-11-25,2024-12-01,EXPIRED
0,ICN100017,Robert,Anderson,47,M,528,Carl Sagan,2019-06-20,2019-06-28,TRANSFERRED TO MENTAL HEALTH UNIT
1,ICN100017,Robert,Anderson,47,M,528,Carl Sagan,2023-03-10,2023-03-17,HOME WITH OUTPATIENT FOLLOW-UP
