In [1]:
import pandas as pd
from pathlib import Path
import json

DATA_PATH = Path("data/cases.csv")    # adjust if needed

cases_df = pd.read_csv(DATA_PATH)

In [2]:
# %% ------------------------------------------------------------
# 1  Column pruning (optional)
# ---------------------------------------------------------------
DROP_COLS = [
    "subjectid", "casestart", "caseend",
    "height", "weight",
    "preop_ecg","preop_pft","preop_hb","preop_plt","preop_pt","preop_aptt",
    "preop_na","preop_k","preop_gluc","preop_alb",
    "preop_ast","preop_alt","preop_bun","preop_cr",
    "preop_ph","preop_hco3","preop_be",
    "preop_pao2","preop_paco2","preop_sao2",
    "cormack","airway","tubesize","dltubesize","lmasize",
    "iv1","iv2","aline1","aline2","cline1","cline2",
    "intraop_ppf","intraop_mdz","intraop_ftn","intraop_rocu","intraop_vecu",
    "intraop_eph","intraop_phe","intraop_epi","intraop_ca",
]

cases_trim = cases_df.drop(columns=DROP_COLS, errors="ignore").copy()

In [3]:
# %% ------------------------------------------------------------
# 2  Map each `opname` → anatomical region
# ---------------------------------------------------------------
REGION_MAP = {
    # ------- Abdomen / GI -------
    "Cholecystectomy"                       : "abdomen",
    "Distal gastrectomy"                    : "abdomen",
    "Anterior resection"                    : "abdomen",
    "Exploratory laparotomy"                : "abdomen",
    "Hemic colectomy"                       : "abdomen",
    "Low anterior resection"                : "abdomen",
    "Hernia repair"                         : "abdomen",
    "Pylorus preserving pancreaticoduodenectomy": "abdomen",
    "Ileostomy repair"                      : "abdomen",
    "Total gastrectomy"                     : "abdomen",
    "Liver segmentectomy"                   : "abdomen",
    "Metastasectomy"                        : "abdomen",
    "Incisional hernia repair"              : "abdomen",
    "Appendectomy"                          : "abdomen",

    # ------- Pelvis / GU ---------
    "Radical prostatectomy"                 : "pelvis",

    # ------- Thorax / Breast -----
    "Lung lobectomy"                        : "thorax",
    "Lung wedge resection"                  : "thorax",
    "Lung segmentectomy"                    : "thorax",
    "Breast-conserving surgery"             : "thorax",
    "Simple mastectomy"                     : "thorax",

    # ------- Head & Neck ----------
    "Thyroid lobectomy"                     : "head_neck",
    "Total thyroidectomy"                   : "head_neck",

    # ------- Transplant ----------
    "Kidney transplantation"                : "abdomen",
    "Liver transplantation"                 : "abdomen",
    "Donor nephrectomy"                     : "abdomen",
    "Donor hepatectomy"                     : "abdomen",
    "Hemihepatectomy"                       : "abdomen",
}

cases_trim["region"] = (
    cases_trim["opname"]
      .str.strip()
      .map(REGION_MAP)
      .fillna("other")
)

cases_trim["region"].value_counts()


region
abdomen      2777
other        2393
thorax        931
head_neck     229
pelvis         58
Name: count, dtype: int64

In [4]:
# %% ------------------------------------------------------------
# 3  Derived helpers
# ---------------------------------------------------------------
# postoperative length of stay (days)
cases_trim["los_postop"] = (cases_trim["dis"] - cases_trim["adm"]) / 86_400

# calendar year (for possible trend charts)
cases_trim["year"] = pd.to_datetime(cases_trim["opstart"], unit="s").dt.year


In [5]:
# %% ------------------------------------------------------------
# 4  Region-level summary
# ---------------------------------------------------------------
region_summary = (
    cases_trim.groupby("region", as_index=False)
              .agg(case_count   = ("caseid","count"),
                   mort_rate    = ("death_inhosp","mean"),
                   mean_ebl     = ("intraop_ebl","mean"),
                   mean_los     = ("los_postop","mean"),
                   pct_emergent = ("emop","mean"),
                   pct_ASA3plus = ("asa", lambda s: (s >= 3).mean()))
              .round(2)
)
region_summary.head()


Unnamed: 0,region,case_count,mort_rate,mean_ebl,mean_los,pct_emergent,pct_ASA3plus
0,abdomen,2777,0.01,465.2,11.08,0.14,0.13
1,head_neck,229,0.0,88.6,4.1,0.0,0.0
2,other,2393,0.01,339.25,11.72,0.16,0.15
3,pelvis,58,0.02,425.44,5.03,0.0,0.02
4,thorax,931,0.0,143.69,7.38,0.02,0.06


In [6]:
# %% ------------------------------------------------------------
# 5  Procedure × Region summary
# ---------------------------------------------------------------
proc_by_region = (
    cases_trim.groupby(["region","opname"], as_index=False)
              .agg(case_count = ("caseid","count"),
                   mort_rate  = ("death_inhosp","mean"),
                   mean_ebl   = ("intraop_ebl","mean"),
                   mean_los   = ("los_postop","mean"))
              .round(2)
)
proc_by_region.head()


Unnamed: 0,region,opname,case_count,mort_rate,mean_ebl,mean_los
0,abdomen,Anterior resection,247,0.0,157.9,9.42
1,abdomen,Appendectomy,56,0.0,56.67,7.38
2,abdomen,Cholecystectomy,503,0.0,101.77,3.52
3,abdomen,Distal gastrectomy,342,0.01,181.26,12.22
4,abdomen,Donor hepatectomy,70,0.0,408.64,8.63


In [8]:
import numpy as np

def sanitize(records):
    """convert NumPy / pandas NaN to plain Python None"""
    def _fix(val):
        if isinstance(val, float) and np.isnan(val):
            return None
        return val
    return [{k: _fix(v) for k, v in rec.items()} for rec in records]

payload = {
    "region_summary": sanitize(region_summary.to_dict(orient="records")),
    "proc_by_region": sanitize(proc_by_region.to_dict(orient="records")),
}

out_path = Path("jwp.json")
out_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")

43179