# Install Packages

In [1]:
!pip install duckdb glob2 polars pyarrow



# Create views

In [2]:
import duckdb as ddb
import os
from glob import glob

con = ddb.connect(database="mimic_v2.db")

paths = {
    'ed': 'files/ed',
    'hosp': 'files/hosp',
    'icu': 'files/icu',
}
count = 0
for schema, path in paths.items():
    # create schema if not exists
    con.execute(f"CREATE SCHEMA IF NOT EXISTS {schema};")
    csv_files = sorted(glob(os.path.join(path, '*.csv')))
    print(f"Found {len(csv_files)} csv files in {path} for schema {schema}")
    for p in csv_files:
        count += 1
        # derive a safe view name from filename
        fname = os.path.splitext(os.path.basename(p))[0]
        view_name = ''.join(c if c.isalnum() else '_' for c in fname).lower()
        full_view = f"{schema}.{view_name}"
        # create or replace view pointing to the CSV
        sql = f"CREATE OR REPLACE VIEW {full_view} AS SELECT * FROM read_csv_auto('{p}');"
        try:
            con.execute(sql)
            print(f"View-Nr.:{count} Created view {full_view} -> {p}")
        except Exception as e:
            print(f"Failed to create view {full_view} for {p}: {e}")

print('Finished creating views.')

Found 6 csv files in files/ed for schema ed
View-Nr.:1 Created view ed.diagnosis -> files/ed/diagnosis.csv
View-Nr.:2 Created view ed.edstays -> files/ed/edstays.csv
View-Nr.:3 Created view ed.medrecon -> files/ed/medrecon.csv
View-Nr.:4 Created view ed.pyxis -> files/ed/pyxis.csv
View-Nr.:5 Created view ed.triage -> files/ed/triage.csv
View-Nr.:6 Created view ed.vitalsign -> files/ed/vitalsign.csv
Found 22 csv files in files/hosp for schema hosp
View-Nr.:7 Created view hosp.admissions -> files/hosp/admissions.csv
View-Nr.:8 Created view hosp.d_hcpcs -> files/hosp/d_hcpcs.csv
View-Nr.:9 Created view hosp.d_icd_diagnoses -> files/hosp/d_icd_diagnoses.csv
View-Nr.:10 Created view hosp.d_icd_procedures -> files/hosp/d_icd_procedures.csv
View-Nr.:11 Created view hosp.d_labitems -> files/hosp/d_labitems.csv
View-Nr.:12 Created view hosp.diagnoses_icd -> files/hosp/diagnoses_icd.csv
View-Nr.:13 Created view hosp.drgcodes -> files/hosp/drgcodes.csv
View-Nr.:14 Created view hosp.emar -> files/

In [3]:
import duckdb
import polars as pl

# --- show all rows and columns ---
#pl.Config.set_tbl_rows(-1)  # show all rows
#pl.Config.set_tbl_cols(-1)  # show all columns


# Verbindung herstellen oder Datei anlegen
con = duckdb.connect("mimic_v2.db")

# SQL
df = con.execute("SHOW ALL TABLES").pl()
df


database,schema,name,column_names,column_types,temporary
str,str,str,list[str],list[str],bool
"""mimic_v2""","""ed""","""diagnosis""","[""subject_id"", ""stay_id"", … ""icd_title""]","[""BIGINT"", ""BIGINT"", … ""VARCHAR""]",false
"""mimic_v2""","""ed""","""edstays""","[""subject_id"", ""hadm_id"", … ""disposition""]","[""BIGINT"", ""BIGINT"", … ""VARCHAR""]",false
"""mimic_v2""","""ed""","""medrecon""","[""subject_id"", ""stay_id"", … ""etcdescription""]","[""BIGINT"", ""BIGINT"", … ""VARCHAR""]",false
"""mimic_v2""","""ed""","""pyxis""","[""subject_id"", ""stay_id"", … ""gsn""]","[""BIGINT"", ""BIGINT"", … ""VARCHAR""]",false
"""mimic_v2""","""ed""","""triage""","[""subject_id"", ""stay_id"", … ""chiefcomplaint""]","[""BIGINT"", ""BIGINT"", … ""VARCHAR""]",false
…,…,…,…,…,…
"""mimic_v2""","""icu""","""icustays""","[""subject_id"", ""hadm_id"", … ""los""]","[""BIGINT"", ""BIGINT"", … ""DOUBLE""]",false
"""mimic_v2""","""icu""","""ingredientevents""","[""subject_id"", ""hadm_id"", … ""originalrate""]","[""BIGINT"", ""BIGINT"", … ""DOUBLE""]",false
"""mimic_v2""","""icu""","""inputevents""","[""subject_id"", ""hadm_id"", … ""originalrate""]","[""BIGINT"", ""BIGINT"", … ""DOUBLE""]",false
"""mimic_v2""","""icu""","""outputevents""","[""subject_id"", ""hadm_id"", … ""valueuom""]","[""BIGINT"", ""BIGINT"", … ""VARCHAR""]",false


# SQL Explore Data

In [4]:
def sql(q: str):
    """Run a sql query on the open DuckDB connection and return a Polars DataFrame."""
    try:
        return con.execute(q).pl()
    except Exception as e:
        print(f"SQL query failed: {e}")
        return pl.DataFrame()

In [8]:
def show(df: pl.DataFrame):
    """Display a Polars DataFrame with an index column."""
    display(df.with_row_index("index"))


In [9]:
df = sql("SELECT * from ed.diagnosis")
show(df)

index,subject_id,stay_id,seq_num,icd_code,icd_version,icd_title
u32,i64,i64,i64,str,i64,str
0,10000032,32952584,1,"""4589""",9,"""HYPOTENSION NOS"""
1,10000032,32952584,2,"""07070""",9,"""UNSPECIFIED VIRAL HEPATITIS C …"
2,10000032,32952584,3,"""V08""",9,"""ASYMPTOMATIC HIV INFECTION"""
3,10000032,33258284,1,"""5728""",9,"""OTH SEQUELA, CHR LIV DIS"""
4,10000032,33258284,2,"""78959""",9,"""OTHER ASCITES"""
…,…,…,…,…,…,…
899045,19999828,30712109,1,"""K632""",10,"""Fistula of intestine"""
899046,19999828,32917002,1,"""E1110""",10,"""Type 2 diabetes mellitus with …"
899047,19999828,32917002,2,"""Z7984""",10,"""Long term (current) use of ora…"
899048,19999914,32002659,1,"""R4182""",10,"""Altered mental status, unspeci…"
