In [None]:
from pathlib import Path

data_dir = Path.cwd() / "data" / "uncompressed"
dir_mapping = {
    "eicu": data_dir / "eicu-crd" / "2.0",
    "mimic": data_dir / "mimiciv" / "3.1"
}
output_dir = Path.cwd() / "output"

In [None]:
from open_icu.meds.project import MEDSProject

project = MEDSProject(
    project_path=output_dir,
    overwrite=True,
)
project.write_metadata({})

In [None]:
from open_icu.config.utils import load_yaml_configs
from open_icu.config.source import SourceConfig

configs = load_yaml_configs(Path.cwd().parent / "configs" / "source", SourceConfig)
configs

In [None]:
from open_icu.meds.processor import process_table

for config in configs:
    for table in config.tables:
        process_table(
            table,
            dir_mapping.get(config.name),
            output_dir,
            config.name,
        )

In [None]:
import polars as pl

for config in configs:
    for table in config.tables:
        for event in table.events:
            df = pl.scan_parquet(output_dir / "data" / config.name / table.name / f"{event.name}.parquet")
            print(f"{config.name} - {table.name} - {event.name}: {df.select(pl.len()).collect().item()}")

In [None]:
pl.scan_parquet(output_dir / "metadata" / "codes.parquet").head().collect()

In [None]:
pl.scan_parquet(output_dir / "data" / "mimic" / "chartevents" / "chartevent.parquet").head(5).collect()