In [1]:
import pandas as pd
from pathlib import Path

In [2]:
import pandas as pd
from pathlib import Path

DATA_PATH = Path("data/cases.csv")   # adjust if your path differs
cases_df  = pd.read_csv(DATA_PATH)


In [3]:
cases_df.columns

Index(['caseid', 'subjectid', 'casestart', 'caseend', 'anestart', 'aneend',
       'opstart', 'opend', 'adm', 'dis', 'icu_days', 'death_inhosp', 'age',
       'sex', 'height', 'weight', 'bmi', 'asa', 'emop', 'department', 'optype',
       'dx', 'opname', 'approach', 'position', 'ane_type', 'preop_htn',
       'preop_dm', 'preop_ecg', 'preop_pft', 'preop_hb', 'preop_plt',
       'preop_pt', 'preop_aptt', 'preop_na', 'preop_k', 'preop_gluc',
       'preop_alb', 'preop_ast', 'preop_alt', 'preop_bun', 'preop_cr',
       'preop_ph', 'preop_hco3', 'preop_be', 'preop_pao2', 'preop_paco2',
       'preop_sao2', 'cormack', 'airway', 'tubesize', 'dltubesize', 'lmasize',
       'iv1', 'iv2', 'aline1', 'aline2', 'cline1', 'cline2', 'intraop_ebl',
       'intraop_uo', 'intraop_rbc', 'intraop_ffp', 'intraop_crystalloid',
       'intraop_colloid', 'intraop_ppf', 'intraop_mdz', 'intraop_ftn',
       'intraop_rocu', 'intraop_vecu', 'intraop_eph', 'intraop_phe',
       'intraop_epi', 'intraop_ca'],
     

In [4]:
# %% ------------------------------------------------------------
# 1  Basic cleaning  (drop columns you don't visualise)
# ---------------------------------------------------------------
DROP_COLS = [
    "subjectid", "casestart", "caseend",            # redundant IDs/timestamps
    "height", "weight",                             # BMI kept
    # granular labs, airway lines, drug boluses...
    "preop_ecg","preop_pft","preop_hb","preop_plt","preop_pt","preop_aptt",
    "preop_na","preop_k","preop_gluc","preop_alb",
    "preop_ast","preop_alt","preop_bun","preop_cr",
    "preop_ph","preop_hco3","preop_be",
    "preop_pao2","preop_paco2","preop_sao2",
    "cormack","airway","tubesize","dltubesize","lmasize",
    "iv1","iv2","aline1","aline2","cline1","cline2",
    "intraop_ppf","intraop_mdz","intraop_ftn","intraop_rocu","intraop_vecu",
    "intraop_eph","intraop_phe","intraop_epi","intraop_ca"
]

cases_trim = cases_df.drop(columns=DROP_COLS, errors="ignore").copy()

In [5]:
# %% ------------------------------------------------------------
# 2  Map each opname → anatomical region
# ---------------------------------------------------------------
REGION_MAP = {
    # Abdomen / GI
    "Cholecystectomy"                       : "abdomen",
    "Distal gastrectomy"                    : "abdomen",
    "Anterior resection"                    : "abdomen",
    "Exploratory laparotomy"                : "abdomen",
    "Hemic colectomy"                       : "abdomen",
    "Low anterior resection"                : "abdomen",
    "Hernia repair"                         : "abdomen",
    "Pylorus preserving pancreaticoduodenectomy": "abdomen",
    "Ileostomy repair"                      : "abdomen",
    "Total gastrectomy"                     : "abdomen",
    "Liver segmentectomy"                   : "abdomen",
    "Metastasectomy"                        : "abdomen",
    "Incisional hernia repair"              : "abdomen",
    "Appendectomy"                          : "abdomen",

    # Pelvis / GU
    "Radical prostatectomy"                 : "pelvis",

    # Thorax / Breast
    "Lung lobectomy"                        : "thorax",
    "Lung wedge resection"                  : "thorax",
    "Lung segmentectomy"                    : "thorax",
    "Breast-conserving surgery"             : "thorax",
    "Simple mastectomy"                     : "thorax",

    # Head & Neck
    "Thyroid lobectomy"                     : "head_neck",
    "Total thyroidectomy"                   : "head_neck",

    # Transplant & donor
    "Kidney transplantation"                : "abdomen",
    "Liver transplantation"                 : "abdomen",
    "Donor nephrectomy"                     : "abdomen",
    "Donor hepatectomy"                     : "abdomen",
    "Hemihepatectomy"                       : "abdomen",
}

cases_trim["region"] = (
    cases_trim["opname"]
            .str.strip()
            .map(REGION_MAP)
            .fillna("other")
)

cases_trim["region"].value_counts()


region
abdomen      2777
other        2393
thorax        931
head_neck     229
pelvis         58
Name: count, dtype: int64

In [6]:
# %% ------------------------------------------------------------
# 3  Derive helper columns
# ---------------------------------------------------------------
# postoperative LOS in *days*
cases_trim["los_postop"] = (cases_trim["dis"] - cases_trim["adm"]) / 86_400

# calendar year (optional trend analysis later)
cases_trim["year"] = pd.to_datetime(cases_trim["opstart"], unit="s").dt.year


In [7]:
# %% ------------------------------------------------------------
# 4  Region-level summary  (for colouring the map)
# ---------------------------------------------------------------
region_summary = (
    cases_trim.groupby("region", as_index=False)
              .agg(
                  case_count     = ("caseid","count"),
                  mort_rate      = ("death_inhosp","mean"),
                  mean_ebl       = ("intraop_ebl","mean"),
                  mean_los       = ("los_postop","mean"),
                  pct_emergent   = ("emop","mean"),
                  pct_ASA3plus   = ("asa", lambda x: (x >= 3).mean()),
              )
              .round(2)
)
region_summary


Unnamed: 0,region,case_count,mort_rate,mean_ebl,mean_los,pct_emergent,pct_ASA3plus
0,abdomen,2777,0.01,465.2,11.08,0.14,0.13
1,head_neck,229,0.0,88.6,4.1,0.0,0.0
2,other,2393,0.01,339.25,11.72,0.16,0.15
3,pelvis,58,0.02,425.44,5.03,0.0,0.02
4,thorax,931,0.0,143.69,7.38,0.02,0.06


In [None]:
# %% ------------------------------------------------------------
# 6  Write JSON files
# ---------------------------------------------------------------
out_dir = Path(".")
region_summary.to_json(out_dir / "jwp.json", orient="records", indent=2)
proc_by_region.to_json(out_dir / "jwp.json", orient="records", indent=2)