In [9]:
import duckdb as ddb
import os
from glob import glob

conn = ddb.connect(database="mimic_v2.db")

paths = {
    'ed': 'files/ed',
    'hosp': 'files/hosp',
    'icu': 'files/icu',
}

for schema, path in paths.items():
    # create schema if not exists
    conn.execute(f"CREATE SCHEMA IF NOT EXISTS {schema};")
    csv_files = sorted(glob(os.path.join(path, '*.csv')))
    print(f"Found {len(csv_files)} csv files in {path} for schema {schema}")
    for p in csv_files:
        # derive a safe view name from filename
        fname = os.path.splitext(os.path.basename(p))[0]
        view_name = ''.join(c if c.isalnum() else '_' for c in fname).lower()
        full_view = f"{schema}.{view_name}"
        # create or replace view pointing to the CSV
        sql = f"CREATE OR REPLACE VIEW {full_view} AS SELECT * FROM read_csv_auto('{p}');"
        try:
            conn.execute(sql)
            print(f"Created view {full_view} -> {p}")
        except Exception as e:
            print(f"Failed to create view {full_view} for {p}: {e}")

print('Finished creating views.')

Found 6 csv files in files/ed for schema ed
Created view ed.diagnosis -> files/ed/diagnosis.csv
Created view ed.edstays -> files/ed/edstays.csv
Created view ed.medrecon -> files/ed/medrecon.csv
Created view ed.pyxis -> files/ed/pyxis.csv
Created view ed.triage -> files/ed/triage.csv
Created view ed.vitalsign -> files/ed/vitalsign.csv
Found 22 csv files in files/hosp for schema hosp
Created view hosp.admissions -> files/hosp/admissions.csv
Created view hosp.d_hcpcs -> files/hosp/d_hcpcs.csv
Created view hosp.d_icd_diagnoses -> files/hosp/d_icd_diagnoses.csv
Created view hosp.d_icd_procedures -> files/hosp/d_icd_procedures.csv
Created view hosp.d_labitems -> files/hosp/d_labitems.csv
Created view hosp.diagnoses_icd -> files/hosp/diagnoses_icd.csv
Created view hosp.drgcodes -> files/hosp/drgcodes.csv
Created view hosp.emar -> files/hosp/emar.csv
Created view hosp.emar_detail -> files/hosp/emar_detail.csv
Created view hosp.hcpcsevents -> files/hosp/hcpcsevents.csv
Created view hosp.labeven

In [10]:
import duckdb
import polars as pl

# Verbindung herstellen oder Datei anlegen
con = duckdb.connect("mimic_v2.db")

# SQL
df = con.execute("SHOW ALL TABLES").df()
display(df)


Unnamed: 0,database,schema,name,column_names,column_types,temporary
0,mimic_v2,ed,diagnosis,"[subject_id, stay_id, seq_num, icd_code, icd_v...","[BIGINT, BIGINT, BIGINT, VARCHAR, BIGINT, VARC...",False
1,mimic_v2,ed,edstays,"[subject_id, hadm_id, stay_id, intime, outtime...","[BIGINT, BIGINT, BIGINT, TIMESTAMP, TIMESTAMP,...",False
2,mimic_v2,ed,medrecon,"[subject_id, stay_id, charttime, name, gsn, nd...","[BIGINT, BIGINT, TIMESTAMP, VARCHAR, VARCHAR, ...",False
3,mimic_v2,ed,pyxis,"[subject_id, stay_id, charttime, med_rn, name,...","[BIGINT, BIGINT, TIMESTAMP, BIGINT, VARCHAR, B...",False
4,mimic_v2,ed,triage,"[subject_id, stay_id, temperature, heartrate, ...","[BIGINT, BIGINT, DOUBLE, DOUBLE, DOUBLE, DOUBL...",False
5,mimic_v2,ed,vitalsign,"[subject_id, stay_id, charttime, temperature, ...","[BIGINT, BIGINT, TIMESTAMP, DOUBLE, DOUBLE, DO...",False
6,mimic_v2,hosp,admissions,"[subject_id, hadm_id, admittime, dischtime, de...","[BIGINT, BIGINT, TIMESTAMP, TIMESTAMP, TIMESTA...",False
7,mimic_v2,hosp,d_hcpcs,"[code, category, long_description, short_descr...","[VARCHAR, BIGINT, VARCHAR, VARCHAR]",False
8,mimic_v2,hosp,d_icd_diagnoses,"[icd_code, icd_version, long_title]","[VARCHAR, BIGINT, VARCHAR]",False
9,mimic_v2,hosp,d_icd_procedures,"[icd_code, icd_version, long_title]","[VARCHAR, BIGINT, VARCHAR]",False


In [26]:
con.execute("SELECT * FROM ed.diagnosis LIMIT 5").df()

Unnamed: 0,subject_id,stay_id,seq_num,icd_code,icd_version,icd_title
0,10000032,32952584,1,4589,9,HYPOTENSION NOS
1,10000032,32952584,2,07070,9,UNSPECIFIED VIRAL HEPATITIS C WITHOUT HEPATIC ...
2,10000032,32952584,3,V08,9,ASYMPTOMATIC HIV INFECTION
3,10000032,33258284,1,5728,9,"OTH SEQUELA, CHR LIV DIS"
4,10000032,33258284,2,78959,9,OTHER ASCITES
