In [8]:
from pathlib import Path

data_dir = Path.cwd() / "data" / "uncompressed"
dir_mapping = {
    "eicu": data_dir / "eicu-crd" / "2.0",
    "mimic-iv": data_dir / "mimiciv" / "3.1"
}
output_dir = Path.cwd() / "output"

In [9]:
from open_icu.config.dataset.meds import MEDSDataset

project = MEDSDataset(
    project_path=output_dir,
    overwrite=True,
)
project.write_metadata({})

In [10]:
from pathlib import Path

from open_icu.config.dataset.source.regestry import DatasetConfigRegistry

registry = DatasetConfigRegistry.from_path(Path.cwd().parent / "config" / "dataset")
configs = registry.all()

In [11]:
from open_icu.transform.processor import process_table

for config in configs:
    for table in config.tables:
        process_table(
            table,
            dir_mapping.get(config.name),
            output_dir,
            config.name,
        )

In [12]:
import polars as pl

for config in configs:
    for table in config.tables:
        for event in table.events:
            df = pl.scan_parquet(output_dir / "data" / config.name / table.name / f"{event.name}.parquet")
            print(f"{config.name} - {table.name} - {event.name}: {df.select(pl.len()).collect().item()}")

eicu - patient - icu_admission: 200859
eicu - patient - icu_discharge: 200859
eicu - vitalPeriodic - heartrate: 146671642
eicu - infusionDrug - drugamount: 4803719
mimic-iv - icustays - icu_admission: 94458
mimic-iv - icustays - icu_discharge: 94458
mimic-iv - icustays - icu_length_of_stay: 94458
mimic-iv - chartevents - chartevent: 432997491
mimic-iv - medications - dosage: 10953713
mimic-iv - medications - rate: 6056482


In [13]:
pl.scan_parquet(output_dir / "metadata" / "codes.parquet").head().collect()

code,description,parent_codes
str,null,null
"""BunScore_ApacheIV""",,
"""D5/0.2%NS w/ KCL (ml/hr)""",,
"""NSS W/ mvi (ml/hr)""",,
"""Verapamil""",,
"""Dobutamine ()""",,


In [None]:
pl.scan_parquet(output_dir / "data" / "mimic-iv" / "chartevents" / "chartevent.parquet").head(5).collect()