In [1]:
from healthchain.sandbox.loaders import MimicOnFHIRLoader
from healthchain.io import Dataset

In [2]:
MIMIC_DIR = "../../datasets/mimic-iv-clinical-database-demo-on-fhir-2.1.0/"
RESOURCES_TO_LOAD = ["MimicObservationChartevents", "MimicObservationLabevents", "MimicPatient"]

In [3]:
# Load MIMIC data as single bundle dict (fast, no validation)
loader = MimicOnFHIRLoader()
bundle = loader.load(
    data_dir=MIMIC_DIR,
    resource_types=RESOURCES_TO_LOAD,
    as_dict=True
)

INFO: 2025-11-27 18:55:49,156 [healthchain.sandbox.loaders.mimic]: Loaded 668862 resources from MimicObservationChartevents.ndjson.gz
INFO: 2025-11-27 18:55:54,360 [healthchain.sandbox.loaders.mimic]: Loaded 107727 resources from MimicObservationLabevents.ndjson.gz
INFO: 2025-11-27 18:55:54,366 [healthchain.sandbox.loaders.mimic]: Loaded 100 resources from MimicPatient.ndjson.gz


In [4]:
len(bundle["entry"])

776689

In [5]:
# Convert to DataFrame using schema
tabular = Dataset.from_fhir_bundle(
    bundle, 
    schema="../healthchain/configs/features/sepsis_vitals.yaml"
)

In [6]:
tabular.dtypes

{'patient_ref': 'object',
 'heart_rate': 'float64',
 'temperature': 'float64',
 'respiratory_rate': 'float64',
 'wbc': 'float64',
 'lactate': 'float64',
 'creatinine': 'float64',
 'age': 'int64',
 'gender_encoded': 'int64'}

In [7]:
len(tabular)

100

In [8]:
df = tabular.data
df.head(10)

Unnamed: 0,patient_ref,heart_rate,temperature,respiratory_rate,wbc,lactate,creatinine,age,gender_encoded
0,Patient/0a8eebfd-a352-522e-89f0-1d4a13abdebc,96.5,98.966667,20.7,5.815385,31.0,0.466667,52,0
1,Patient/0c2243d2-987b-5cbd-8eb1-170a80647693,85.37931,98.135294,12.103448,7.7375,66.5,0.594937,57,1
2,Patient/13df78e7-150e-5eb7-be5f-5f62b2baee87,104.307692,98.375,15.961538,8.3375,,0.966667,66,1
3,Patient/158f3a39-e3d7-5e7a-93aa-57af894aadd9,99.73913,98.39,18.358696,11.509091,,0.628571,40,0
4,Patient/1ab119a5-aac8-5002-9d2f-b8ff69623387,93.402985,98.824242,21.753731,15.578571,,0.646667,34,1
5,Patient/1bb918ba-e04e-5e7a-87ca-dbcbbb4c72c3,78.181818,98.725,16.961538,22.90625,,0.756667,38,0
6,Patient/1cf9e585-806c-513b-80af-4ca565a28231,104.666667,98.81,19.388889,12.520952,,3.847321,53,1
7,Patient/22a3e422-663a-561c-b305-a0c04bf42235,69.05,98.8,16.9,10.754545,,1.353846,87,1
8,Patient/23069939-0c4c-517b-a3ec-baae0d4e3988,78.694444,98.325,16.055556,9.383333,,0.86,52,1
9,Patient/23f959c1-6ac2-562b-9cbe-c111f338e27b,87.184524,98.827091,17.251497,14.954054,,0.642105,51,1


In [9]:
len(df)

100

In [10]:
import joblib
MODEL_PATH = '../cookbook/models/sepsis_model.pkl'
model_data = joblib.load(MODEL_PATH)

In [11]:
model_data

{'model': XGBClassifier(base_score=None, booster=None, callbacks=None,
               colsample_bylevel=None, colsample_bynode=None,
               colsample_bytree=None, device=None, early_stopping_rounds=None,
               enable_categorical=False, eval_metric='logloss',
               feature_types=None, feature_weights=None, gamma=None,
               grow_policy=None, importance_type=None,
               interaction_constraints=None, learning_rate=None, max_bin=None,
               max_cat_threshold=None, max_cat_to_onehot=None,
               max_delta_step=None, max_depth=None, max_leaves=None,
               min_child_weight=None, missing=nan, monotone_constraints=None,
               multi_strategy=None, n_estimators=None, n_jobs=-1,
               num_parallel_tree=None, ...),
 'scaler': StandardScaler(),
 'metadata': {'model_name': 'XGBoost',
  'training_date': '2025-11-22T13:52:14.144052',
  'feature_names': ['heart_rate',
   'temperature',
   'respiratory_rate',
   'wbc'

In [12]:
model = model_data["model"]
patient_features = df[model_data["metadata"]["feature_names"]]

In [13]:
# ML inference
probabilities = model.predict_proba(patient_features)[:, 1]

In [14]:
threshold = model_data["metadata"]["metrics"]["optimal_threshold"]
predictions = (probabilities >= threshold).astype(int)

In [None]:
# Convert back to FHIR
risk_assessments = tabular.to_risk_assessment(
    outcome_code="A41.9",
    outcome_display="Sepsis",
    model_name="XGBoost"
)

In [16]:
len(risk_assessments)

100

In [17]:
risk_assessments[5].model_dump()

{'resourceType': 'RiskAssessment',
 'id': 'hc-71012a5d-cf7f-436d-864b-327efe28b483',
 'status': 'final',
 'method': {'coding': [{'system': 'https://healthchain.github.io/ml-models',
    'code': 'XGBoost',
    'display': 'XGBoost'}]},
 'subject': {'reference': 'Patient/1bb918ba-e04e-5e7a-87ca-dbcbbb4c72c3'},
 'occurrenceDateTime': datetime.datetime(2025, 11, 27, 18, 56, 51, tzinfo=TzInfo(UTC)),
 'prediction': [{'outcome': {'coding': [{'system': 'http://hl7.org/fhir/sid/icd-10',
      'code': 'A41.9',
      'display': 'Sepsis'}]},
   'probabilityDecimal': 0.07619287073612213,
   'qualitativeRisk': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/risk-probability',
      'code': 'low',
      'display': 'Low'}]}}],
 'note': [{'text': 'ML prediction: Negative (probability: 7.62%, risk: low)'}]}