In [3]:
from pathlib import Path
import sqlite3, pandas as pd

#db_path = Path(__file__).resolve().parents[1] / "data" / "health.db"  # if running from repo subdir
# Or if in a notebook, use:
db_path = Path.cwd().parent / "data" / "health.db"

con = sqlite3.connect(str(db_path))
print("Connected to", db_path)

pd.read_sql("SELECT * FROM vw_readmit_month", con).to_csv("readmit_month.csv", index=False)
pd.read_sql("SELECT * FROM vw_top_conditions", con).to_csv("top_conditions.csv", index=False)
con.close()


Connected to /workspaces/health-etl-pipeline/data/health.db


In [4]:
# export_for_bi.py
from pathlib import Path
import sqlite3, pandas as pd

ROOT = Path(__file__).resolve().parent
DB    = ROOT / "data" / "health.db"
OUT   = ROOT / "bi_exports"
OUT.mkdir(exist_ok=True)

con = sqlite3.connect(DB)

# (Optional) see what views/tables exist
print(pd.read_sql("SELECT name, type FROM sqlite_master WHERE type in ('view','table') ORDER BY type, name;", con).head(30))

# Export views if they exist; otherwise fall back to equivalent queries
def export_sql(name, sql):
    df = pd.read_sql(sql, con)
    fp = OUT / f"{name}.csv"
    df.to_csv(fp, index=False)
    print(f"Wrote {len(df):,} rows → {fp}")

# Try views first
has_vw_readmit = pd.read_sql("SELECT name FROM sqlite_master WHERE type='view' AND name='vw_readmit_month';", con).shape[0] > 0
has_vw_topcond = pd.read_sql("SELECT name FROM sqlite_master WHERE type='view' AND name='vw_top_conditions';", con).shape[0] > 0

if has_vw_readmit:
    export_sql("readmit_month", "SELECT * FROM vw_readmit_month ORDER BY month_start;")
else:
    export_sql("readmit_month", """
        WITH fe AS (
          SELECT date(strftime('%Y-%m-01', start_date)) AS month_start,
                 CASE WHEN was_readmit THEN 1.0 ELSE 0.0 END AS readmit
          FROM encounters_clean
        )
        SELECT month_start, AVG(readmit) AS readmit_rate
        FROM fe
        GROUP BY month_start
        ORDER BY month_start;
    """)

if has_vw_topcond:
    export_sql("top_conditions", "SELECT * FROM vw_top_conditions ORDER BY patients DESC;")
else:
    export_sql("top_conditions", """
        SELECT condition_name, COUNT(DISTINCT patient_id) AS patients
        FROM conditions_clean
        GROUP BY condition_name
        ORDER BY patients DESC;
    """)

# Optional: CDI slice (Maryland Diabetes crude prevalence by year)
if pd.read_sql("SELECT COUNT(*) AS c FROM sqlite_master WHERE type='table' AND name='cdi';", con)["c"][0]:
    export_sql("cdi_diabetes_md", """
        SELECT yearstart AS year,
               AVG(CAST(datavalue AS REAL)) AS diabetes_prevalence
        FROM cdi
        WHERE locationabbr='MD' AND topic='Diabetes'
        GROUP BY year
        ORDER BY year;
    """)

con.close()
print(f"CSV exports are in: {OUT}")


NameError: name '__file__' is not defined