In [1]:
import numpy as np
import pandas as pd

pd.set_option("display.max_columns", None)

In [3]:
!find . -name "mds_ed-Copy1.csv"

./mds_ed-Copy1.csv


In [2]:
import datetime as dt

import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.cluster import KMeans
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.feature_selection import VarianceThreshold
from sklearn.impute import SimpleImputer
from sklearn.metrics import silhouette_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, RobustScaler, StandardScaler

In [11]:
# Code below eliminates diagnosis columns and lab columns that will not be used
# without reducing the file size, it was too big to load

df_full = pd.read_csv("./mds_ed-Copy1.csv", low_memory=False, nrows=10)
cols = df_full.columns

# identify columns that can be eliminated to make the file usable
remove_vals_ = [
    "diagnoses_",
    "general_file_name",
    "Unnamed: 0",
    "subject_id",
    "stay_id",
    "row_id",
]
reduced_cols = [col for col in cols if not any(val in col for val in remove_vals_)]  #
df_reducedx = pd.read_csv("./mds_ed-Copy1.csv", low_memory=False, usecols=reduced_cols)

# drop all extraneous lab values
drop_cols = [
    c
    for c in df_reducedx.columns
    if c.startswith("labvalues_") and not c.endswith("_first")
]
df_reducedx.drop(columns=drop_cols, inplace=True)

# --- 1. Drop columns with >90% missing ---
col_thresh = len(df_reducedx) * 0.1
df = df_reducedx.dropna(axis=1, thresh=col_thresh)

# --- 2. Drop rows with >30% missing ---
row_thresh = df.shape[1] * 0.40
df = df.dropna(axis=0, thresh=row_thresh)

# --- 3. Convert hadm_id -> admit (0/1)
df["admit"] = df["general_ed_hadm_id"].notna().astype(int)

# --- 4. Drop hadm_id
df = df.drop(columns=["general_ed_hadm_id"])

# replace -999 with na
df = df.replace(-999, np.nan)

In [12]:
print("before dropping empty:", df_reducedx.shape)
print("after:", df.shape)

dropped_cols = [col for col in df_reducedx.columns if col not in df.columns]
print("Dropped columns:", dropped_cols)

before dropping empty: (129057, 144)
after: (116995, 132)
Dropped columns: ['general_ed_hadm_id', 'general_hosp_dischtime', 'labvalues_bands_first', 'labvalues_base_excess_first', 'labvalues_bilirubin,_direct_first', 'labvalues_c-reactive_protein_first', 'labvalues_carboxyhemoglobin_first', 'labvalues_creatine_kinase_(ck)_first', 'labvalues_creatine_kinase,_mb_isoenzyme_first', 'labvalues_fibrinogen,_functional_first', 'labvalues_free_calcium_first', 'labvalues_oxygen_saturation_first', 'labvalues_pco2_first']


In [19]:
# Replace sentinel values like -999 with NaN so we can check death times properly
df["general_mortality_days"] = df["general_mortality_days"].replace(-999, np.nan)



# 1. Any mortality (died at any time)
df["mortality_any"] = df["general_mortality_days"].notna().astype(int)

d = df["general_mortality_days"]

# 2. Short-term mortality (died within 28 days)
df["mortality_28d"] = ((d.notna()) & (d <= 28)).astype(int)

# 3. Medium-term mortality (died within 365 days)
df["mortality_365d"] = ((d.notna()) & (d > 28) & (d <= 365)).astype(int)

# 4. Long-term mortality (died, but after 365 days)
df["mortality_gt365d"] = ((d.notna()) & (d > 365)).astype(int)
print(
    df[
        [
            "general_mortality_days",
            "mortality_any",
            "mortality_28d",
            "mortality_365d",
            "mortality_gt365d",
        ]
    ].head(20)
)

    general_mortality_days  mortality_any  mortality_28d  mortality_365d  \
1                      NaN              0              0               0   
2                    645.0              1              0               0   
3                      NaN              0              0               0   
4                   1879.0              1              0               0   
5                    738.0              1              0               0   
6                      NaN              0              0               0   
7                      NaN              0              0               0   
8                      NaN              0              0               0   
9                      NaN              0              0               0   
10                     NaN              0              0               0   
11                     NaN              0              0               0   
12                     NaN              0              0               0   
13          

In [5]:
# 1) Treat sentinel as "no death observed"
df["general_mortality_days"] = pd.to_numeric(
    df["general_mortality_days"], errors="coerce"
)
df["general_mortality_days"] = df["general_mortality_days"].replace(-999, np.nan)

gmd = df["general_mortality_days"]

# 2) Build mutually exclusive categories
conds = [
    gmd.notna() & (gmd <= 28),
    gmd.notna() & (gmd > 28) & (gmd <= 365),
    gmd.notna() & (gmd > 365),
]
choices = ["Short-term", "Medium-term", "Long-term"]

df["mortality_category"] = np.select(conds, choices, default="Alive")

# 3)  make it an ordered categorical 
cat_order = ["Alive", "Short-term", "Medium-term", "Long-term"]
df["mortality_category"] = pd.Categorical(
    df["mortality_category"], categories=cat_order, ordered=True
)


df["mortality_category"]

1               Alive
2           Long-term
3               Alive
4           Long-term
5           Long-term
             ...     
129050      Long-term
129052          Alive
129053      Long-term
129054    Medium-term
129055          Alive
Name: mortality_category, Length: 116995, dtype: category
Categories (4, object): ['Alive' < 'Short-term' < 'Medium-term' < 'Long-term']

In [11]:
# determining if there are some mistakes in the entry of deterioration moratality columns (some reduntant entries)
mortality_cols = [c for c in df.columns if "mortality" in c.lower()]
df_mortality = df[mortality_cols]
df_mortality.columns

# identify rows where more than one deterioration_mortality has a 1
mask = (
    df_mortality[
        [
            "deterioration_mortality_1d",
            "deterioration_mortality_7d",
            "deterioration_mortality_28d",
            "deterioration_mortality_90d",
            "deterioration_mortality_180d",
            "deterioration_mortality_365d",
        ]
    ].sum(axis=1)
    > 1
)

df_mortality_duplicates = df_mortality.loc[mask]

print("df shape", df_mortality.shape)
print("duplicates count", df_mortality_duplicates.shape)
df[df["deterioration_mortality_stay"] == 0]

df shape (116995, 14)
duplicates count (12188, 14)


Unnamed: 0,general_study_id,general_ecg_time,general_ed_diag_ed,general_ed_diag_hosp,demographics_gender,demographics_age,general_anchor_year,general_anchor_age,general_dod,general_ecg_no_within_stay,general_strat_fold,general_intime,general_outtime,general_race,general_90min,general_mortality_hours,general_mortality_days,general_icu_time_hours,demographics_ethnicity_asian,demographics_ethnicity_black/african,demographics_ethnicity_hispanic/latino,demographics_ethnicity_other,demographics_ethnicity_white,biometrics_bmi,biometrics_weight,biometrics_height,vitals_temperature_mean,vitals_temperature_median,vitals_temperature_min,vitals_temperature_max,vitals_temperature_std,vitals_temperature_first,vitals_temperature_last,vitals_temperature_rate_change,vitals_temperature_coeff,vitals_heartrate_mean,vitals_heartrate_median,vitals_heartrate_min,vitals_heartrate_max,vitals_heartrate_std,vitals_heartrate_first,vitals_heartrate_last,vitals_heartrate_rate_change,vitals_heartrate_coeff,vitals_resprate_mean,vitals_resprate_median,vitals_resprate_min,vitals_resprate_max,vitals_resprate_std,vitals_resprate_first,vitals_resprate_last,vitals_resprate_rate_change,vitals_resprate_coeff,vitals_o2sat_mean,vitals_o2sat_median,vitals_o2sat_min,vitals_o2sat_max,vitals_o2sat_std,vitals_o2sat_first,vitals_o2sat_last,vitals_o2sat_rate_change,vitals_o2sat_coeff,vitals_sbp_mean,vitals_sbp_median,vitals_sbp_min,vitals_sbp_max,vitals_sbp_std,vitals_sbp_first,vitals_sbp_last,vitals_sbp_rate_change,vitals_sbp_coeff,vitals_dbp_mean,vitals_dbp_median,vitals_dbp_min,vitals_dbp_max,vitals_dbp_std,vitals_dbp_first,vitals_dbp_last,vitals_dbp_rate_change,vitals_dbp_coeff,vitals_acuity,labvalues_absolute_basophil_count_first,labvalues_absolute_eosinophil_count_first,labvalues_absolute_lymphocyte_count_first,labvalues_alanine_aminotransferase_(alt)_first,labvalues_albumin_first,labvalues_alkaline_phosphatase_first,labvalues_asparate_aminotransferase_(ast)_first,labvalues_basophils_first,labvalues_bicarbonate_first,"labvalues_bilirubin,_total_first","labvalues_calcium,_total_first",labvalues_chloride_first,labvalues_creatinine_first,labvalues_eosinophils_first,labvalues_glucose_first,labvalues_hematocrit_first,labvalues_hemoglobin_first,labvalues_inr(pt)_first,labvalues_lactate_first,labvalues_lymphocytes_first,labvalues_magnesium_first,labvalues_neutrophils_first,labvalues_pt_first,labvalues_ptt_first,labvalues_phosphate_first,labvalues_platelet_count_first,labvalues_potassium_first,labvalues_rdw_first,labvalues_red_blood_cells_first,labvalues_sodium_first,labvalues_troponin_t_first,labvalues_urea_nitrogen_first,labvalues_white_blood_cells_first,labvalues_ph_first,deterioration_severe_hypoxemia,deterioration_ecmo,deterioration_vasopressors,deterioration_inotropes,deterioration_mechanical_ventilation,deterioration_cardiac_arrest,deterioration_icu_24h,deterioration_icu_stay,deterioration_mortality_1d,deterioration_mortality_7d,deterioration_mortality_28d,deterioration_mortality_90d,deterioration_mortality_180d,deterioration_mortality_365d,deterioration_mortality_stay,general_data,admit,mortality_any,mortality_28d,mortality_365d,mortality_gt365d,mortality_category
1,40000084,2179-08-30 11:58:00,"['R112', 'K920']",[],1,27.0,2178.0,26.0,,0,9,2179-08-30 11:49:00,2179-08-30 18:35:00,HISPANIC/LATINO - DOMINICAN,2179-08-30 13:19:00,,,,0,0,1,0,0,,,,37.277778,37.277778,37.277778,37.277778,0.0,37.277778,37.277778,0.0,0.0,60.5,60.5,57.0,64.0,3.5,64.0,57.0,-0.109375,-0.205882,16.5,16.5,15.0,18.0,1.5,18.0,15.0,-0.166667,-0.088235,98.5,98.5,98.0,99.0,0.5,99.0,98.0,-0.010101,-0.029412,112.5,112.5,103.0,122.0,9.5,122.0,103.0,-0.155738,-0.558824,68.0,68.0,66.0,70.0,2.0,70.0,66.0,-0.057143,-0.117647,2.0,0.04,0.27,1.32,,4.4,72.0,24.0,1.3,25.0,1.0,,102.0,0.7,8.8,92.0,46.8,15.8,1.0,,42.9,,35.3,11.4,32.0,,251.0,4.2,12.5,4.84,136.0,,7.0,3.1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,670359,0,0,0,0,0,Alive
2,40000115,2163-04-17 16:45:00,"['Z950', 'R001']",[],0,93.0,2157.0,87.0,2165-01-22,0,5,2163-04-17 16:32:00,2163-04-17 21:46:00,BLACK/AFRICAN AMERICAN,2163-04-17 18:02:00,15487.466667,645.0,,0,1,0,0,0,21.3,47.853956,,36.722222,36.722222,36.722222,36.722222,0.0,36.722222,36.722222,0.0,0.0,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,184.0,184.0,184.0,184.0,0.0,184.0,184.0,0.000000,0.000000,46.0,46.0,46.0,46.0,0.0,46.0,46.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,205343,0,1,0,0,1,Long-term
3,40000143,2175-07-21 01:01:00,"['Y0889XA', 'S066X0A']","['K219', 'Y048XXA', 'G43909', 'S066X0A', 'E785...",0,60.0,2175.0,60.0,,0,19,2175-07-21 00:37:00,2175-07-21 06:20:00,WHITE,2175-07-21 02:07:00,,,,0,0,0,0,1,25.3,53.070264,144.78,,,,,,,,,,92.0,92.0,90.0,94.0,2.0,94.0,90.0,-0.042553,-0.056338,16.0,16.0,14.0,18.0,2.0,14.0,18.0,0.285714,0.056338,99.0,99.0,99.0,99.0,0.0,99.0,99.0,0.000000,0.000000,158.5,158.5,146.0,171.0,12.5,171.0,146.0,-0.146199,-0.352113,102.0,102.0,100.0,104.0,2.0,104.0,100.0,-0.038462,-0.056338,1.0,0.14,0.27,2.38,,,,,0.7,26.0,,,101.0,0.8,1.4,101.0,41.3,13.6,0.9,,12.1,,79.2,9.8,27.1,,282.0,6.8,13.2,4.40,138.0,,11.0,19.6,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,375664,1,0,0,0,0,Alive
4,40000175,2147-02-03 01:28:00,['K922'],"['K766', 'B182', 'I8511', 'F17200', 'E119', 'K...",1,67.0,2147.0,67.0,2152-03-28,0,14,2147-02-03 01:13:00,2147-02-03 05:08:00,UNKNOWN,2147-02-03 02:43:00,45118.783333,1879.0,3.916667,0,0,0,1,0,,,,36.333333,36.333333,36.333333,36.333333,0.0,36.333333,36.333333,0.0,0.0,78.0,78.0,77.0,79.0,1.0,77.0,79.0,0.025974,0.054054,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,95.5,95.5,95.0,96.0,0.5,95.0,96.0,0.010526,0.027027,97.5,97.5,96.0,99.0,1.5,99.0,96.0,-0.030303,-0.081081,62.0,62.0,61.0,63.0,1.0,61.0,63.0,0.032787,0.054054,2.0,,,,25.0,2.8,73.0,39.0,0.5,25.0,0.7,,111.0,0.6,2.1,214.0,30.1,10.1,,,27.1,,63.5,,,,77.0,4.6,15.2,3.22,141.0,,22.0,4.8,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,287789,1,1,0,0,1,Long-term
5,40000243,2125-03-15 20:21:00,"['W1830XA', 'S0090XA']","['F419', 'D649', 'E039', 'E860', 'Y92009', 'R1...",0,78.0,2120.0,73.0,2127-03-24,0,14,2125-03-15 20:04:00,2125-03-16 02:29:21,WHITE,2125-03-15 21:34:00,17715.933333,738.0,,0,0,0,0,1,26.4,65.398895,,36.555556,36.555556,36.555556,36.555556,0.0,36.555556,36.555556,0.0,0.0,72.0,72.0,72.0,72.0,0.0,72.0,72.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,183.0,183.0,183.0,183.0,0.0,183.0,183.0,0.000000,0.000000,74.0,74.0,74.0,74.0,0.0,74.0,74.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61364,1,1,0,0,1,Long-term
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129050,49999768,2185-07-26 15:29:00,['R42'],"['H4010X4', 'G609', 'K7689', 'R0789', 'G4733',...",0,72.0,2183.0,70.0,2191-11-27,0,14,2185-07-26 15:15:00,2185-07-27 12:40:00,BLACK/CAPE VERDEAN,2185-07-26 16:45:00,55544.750000,2314.0,,0,1,0,0,0,37.5,92.986360,,,,,,,,,,,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,96.0,96.0,96.0,96.0,0.0,96.0,96.0,0.000000,0.000000,114.0,114.0,114.0,114.0,0.0,114.0,114.0,0.000000,0.000000,101.0,101.0,101.0,101.0,0.0,101.0,101.0,0.000000,0.000000,2.0,,,,15.0,4.1,84.0,18.0,0.9,28.0,0.2,9.4,86.0,2.1,1.5,265.0,32.8,10.2,,3.9,24.4,2.1,66.7,,,3.7,307.0,3.3,14.9,3.68,134.0,,64.0,8.9,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,591478,1,1,0,0,1,Long-term
129052,49999842,2162-09-09 15:45:00,"['Z98890', 'M79632', 'Z7901', 'M25562', 'M549'...",[],1,60.0,2156.0,54.0,,0,6,2162-09-09 15:27:00,2162-09-09 21:47:00,WHITE,2162-09-09 16:57:00,,,,0,0,0,0,1,,,,36.500000,36.500000,36.500000,36.500000,0.0,36.500000,36.500000,0.0,0.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,156.0,156.0,156.0,156.0,0.0,156.0,156.0,0.000000,0.000000,75.0,75.0,75.0,75.0,0.0,75.0,75.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,764408,0,0,0,0,0,Alive
129053,49999850,2170-09-30 10:31:00,"['N186', 'E119', 'R55', 'K859']","['N186', 'K219', 'Z7901', 'I739', 'D3500', 'M8...",0,71.0,2166.0,67.0,2176-11-23,0,11,2170-09-30 10:19:00,2170-09-30 16:38:00,HISPANIC/LATINO - MEXICAN,2170-09-30 11:49:00,53893.683333,2245.0,,0,0,1,0,0,34.9,89.357624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,21.0,4.1,124.0,64.0,0.4,24.0,0.5,,102.0,6.7,0.5,159.0,39.7,12.4,1.3,3.3,6.6,,89.9,13.9,21.7,,183.0,5.3,13.4,3.82,141.0,,50.0,10.5,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,350992,1,1,0,0,1,Long-term
129054,49999893,2161-12-08 18:13:00,['R42'],"['J341', 'Z9079', 'M47896', 'Z87891', 'M109', ...",1,73.0,2155.0,67.0,2162-05-25,0,9,2161-12-08 18:00:00,2161-12-09 02:33:00,BLACK/AFRICAN AMERICAN,2161-12-08 19:30:00,4014.000000,167.0,,0,1,0,0,0,26.0,75.400598,170.18,35.944444,35.944444,35.944444,35.944444,0.0,35.944444,35.944444,0.0,0.0,78.0,78.0,78.0,78.0,0.0,78.0,78.0,0.000000,0.000000,20.0,20.0,20.0,20.0,0.0,20.0,20.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,134.0,134.0,134.0,134.0,0.0,134.0,134.0,0.000000,0.000000,76.0,76.0,76.0,76.0,0.0,76.0,76.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,296265,1,1,0,1,0,Medium-term


In [20]:
# create clnical deterioration target, seperate from mortality target along with a death or deterioration combined target

deterioration_cols = [
    c
    for c in df.columns
    if "deterioration" in c.lower() and "mortality" not in c.lower()
]
df["clinical_deterioration_any"] = df[deterioration_cols].any(axis=1).astype(int)
df["death_or_deterioration_any"] = (
    df[["mortality_any", "clinical_deterioration_any"]].any(axis=1).astype(int)
)
df

Unnamed: 0,general_study_id,general_ecg_time,general_ed_diag_ed,general_ed_diag_hosp,demographics_gender,demographics_age,general_anchor_year,general_anchor_age,general_dod,general_ecg_no_within_stay,general_strat_fold,general_intime,general_outtime,general_race,general_90min,general_mortality_hours,general_mortality_days,general_icu_time_hours,demographics_ethnicity_asian,demographics_ethnicity_black/african,demographics_ethnicity_hispanic/latino,demographics_ethnicity_other,demographics_ethnicity_white,biometrics_bmi,biometrics_weight,biometrics_height,vitals_temperature_mean,vitals_temperature_median,vitals_temperature_min,vitals_temperature_max,vitals_temperature_std,vitals_temperature_first,vitals_temperature_last,vitals_temperature_rate_change,vitals_temperature_coeff,vitals_heartrate_mean,vitals_heartrate_median,vitals_heartrate_min,vitals_heartrate_max,vitals_heartrate_std,vitals_heartrate_first,vitals_heartrate_last,vitals_heartrate_rate_change,vitals_heartrate_coeff,vitals_resprate_mean,vitals_resprate_median,vitals_resprate_min,vitals_resprate_max,vitals_resprate_std,vitals_resprate_first,vitals_resprate_last,vitals_resprate_rate_change,vitals_resprate_coeff,vitals_o2sat_mean,vitals_o2sat_median,vitals_o2sat_min,vitals_o2sat_max,vitals_o2sat_std,vitals_o2sat_first,vitals_o2sat_last,vitals_o2sat_rate_change,vitals_o2sat_coeff,vitals_sbp_mean,vitals_sbp_median,vitals_sbp_min,vitals_sbp_max,vitals_sbp_std,vitals_sbp_first,vitals_sbp_last,vitals_sbp_rate_change,vitals_sbp_coeff,vitals_dbp_mean,vitals_dbp_median,vitals_dbp_min,vitals_dbp_max,vitals_dbp_std,vitals_dbp_first,vitals_dbp_last,vitals_dbp_rate_change,vitals_dbp_coeff,vitals_acuity,labvalues_absolute_basophil_count_first,labvalues_absolute_eosinophil_count_first,labvalues_absolute_lymphocyte_count_first,labvalues_alanine_aminotransferase_(alt)_first,labvalues_albumin_first,labvalues_alkaline_phosphatase_first,labvalues_asparate_aminotransferase_(ast)_first,labvalues_basophils_first,labvalues_bicarbonate_first,"labvalues_bilirubin,_total_first","labvalues_calcium,_total_first",labvalues_chloride_first,labvalues_creatinine_first,labvalues_eosinophils_first,labvalues_glucose_first,labvalues_hematocrit_first,labvalues_hemoglobin_first,labvalues_inr(pt)_first,labvalues_lactate_first,labvalues_lymphocytes_first,labvalues_magnesium_first,labvalues_neutrophils_first,labvalues_pt_first,labvalues_ptt_first,labvalues_phosphate_first,labvalues_platelet_count_first,labvalues_potassium_first,labvalues_rdw_first,labvalues_red_blood_cells_first,labvalues_sodium_first,labvalues_troponin_t_first,labvalues_urea_nitrogen_first,labvalues_white_blood_cells_first,labvalues_ph_first,deterioration_severe_hypoxemia,deterioration_ecmo,deterioration_vasopressors,deterioration_inotropes,deterioration_mechanical_ventilation,deterioration_cardiac_arrest,deterioration_icu_24h,deterioration_icu_stay,deterioration_mortality_1d,deterioration_mortality_7d,deterioration_mortality_28d,deterioration_mortality_90d,deterioration_mortality_180d,deterioration_mortality_365d,deterioration_mortality_stay,general_data,admit,mortality_any,mortality_28d,mortality_365d,mortality_gt365d,mortality_category,clinical_deterioration_any,death_or_deterioration_any
1,40000084,2179-08-30 11:58:00,"['R112', 'K920']",[],1,27.0,2178.0,26.0,,0,9,2179-08-30 11:49:00,2179-08-30 18:35:00,HISPANIC/LATINO - DOMINICAN,2179-08-30 13:19:00,,,,0,0,1,0,0,,,,37.277778,37.277778,37.277778,37.277778,0.0,37.277778,37.277778,0.0,0.0,60.5,60.5,57.0,64.0,3.5,64.0,57.0,-0.109375,-0.205882,16.5,16.5,15.0,18.0,1.5,18.0,15.0,-0.166667,-0.088235,98.5,98.5,98.0,99.0,0.5,99.0,98.0,-0.010101,-0.029412,112.5,112.5,103.0,122.0,9.5,122.0,103.0,-0.155738,-0.558824,68.0,68.0,66.0,70.0,2.0,70.0,66.0,-0.057143,-0.117647,2.0,0.04,0.27,1.32,,4.4,72.0,24.0,1.3,25.0,1.0,,102.0,0.7,8.8,92.0,46.8,15.8,1.0,,42.9,,35.3,11.4,32.0,,251.0,4.2,12.5,4.84,136.0,,7.0,3.1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,670359,0,0,0,0,0,Alive,0,0
2,40000115,2163-04-17 16:45:00,"['Z950', 'R001']",[],0,93.0,2157.0,87.0,2165-01-22,0,5,2163-04-17 16:32:00,2163-04-17 21:46:00,BLACK/AFRICAN AMERICAN,2163-04-17 18:02:00,15487.466667,645.0,,0,1,0,0,0,21.3,47.853956,,36.722222,36.722222,36.722222,36.722222,0.0,36.722222,36.722222,0.0,0.0,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,184.0,184.0,184.0,184.0,0.0,184.0,184.0,0.000000,0.000000,46.0,46.0,46.0,46.0,0.0,46.0,46.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,205343,0,1,0,0,1,Long-term,0,1
3,40000143,2175-07-21 01:01:00,"['Y0889XA', 'S066X0A']","['K219', 'Y048XXA', 'G43909', 'S066X0A', 'E785...",0,60.0,2175.0,60.0,,0,19,2175-07-21 00:37:00,2175-07-21 06:20:00,WHITE,2175-07-21 02:07:00,,,,0,0,0,0,1,25.3,53.070264,144.78,,,,,,,,,,92.0,92.0,90.0,94.0,2.0,94.0,90.0,-0.042553,-0.056338,16.0,16.0,14.0,18.0,2.0,14.0,18.0,0.285714,0.056338,99.0,99.0,99.0,99.0,0.0,99.0,99.0,0.000000,0.000000,158.5,158.5,146.0,171.0,12.5,171.0,146.0,-0.146199,-0.352113,102.0,102.0,100.0,104.0,2.0,104.0,100.0,-0.038462,-0.056338,1.0,0.14,0.27,2.38,,,,,0.7,26.0,,,101.0,0.8,1.4,101.0,41.3,13.6,0.9,,12.1,,79.2,9.8,27.1,,282.0,6.8,13.2,4.40,138.0,,11.0,19.6,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,375664,1,0,0,0,0,Alive,0,0
4,40000175,2147-02-03 01:28:00,['K922'],"['K766', 'B182', 'I8511', 'F17200', 'E119', 'K...",1,67.0,2147.0,67.0,2152-03-28,0,14,2147-02-03 01:13:00,2147-02-03 05:08:00,UNKNOWN,2147-02-03 02:43:00,45118.783333,1879.0,3.916667,0,0,0,1,0,,,,36.333333,36.333333,36.333333,36.333333,0.0,36.333333,36.333333,0.0,0.0,78.0,78.0,77.0,79.0,1.0,77.0,79.0,0.025974,0.054054,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,95.5,95.5,95.0,96.0,0.5,95.0,96.0,0.010526,0.027027,97.5,97.5,96.0,99.0,1.5,99.0,96.0,-0.030303,-0.081081,62.0,62.0,61.0,63.0,1.0,61.0,63.0,0.032787,0.054054,2.0,,,,25.0,2.8,73.0,39.0,0.5,25.0,0.7,,111.0,0.6,2.1,214.0,30.1,10.1,,,27.1,,63.5,,,,77.0,4.6,15.2,3.22,141.0,,22.0,4.8,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,287789,1,1,0,0,1,Long-term,1,1
5,40000243,2125-03-15 20:21:00,"['W1830XA', 'S0090XA']","['F419', 'D649', 'E039', 'E860', 'Y92009', 'R1...",0,78.0,2120.0,73.0,2127-03-24,0,14,2125-03-15 20:04:00,2125-03-16 02:29:21,WHITE,2125-03-15 21:34:00,17715.933333,738.0,,0,0,0,0,1,26.4,65.398895,,36.555556,36.555556,36.555556,36.555556,0.0,36.555556,36.555556,0.0,0.0,72.0,72.0,72.0,72.0,0.0,72.0,72.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,183.0,183.0,183.0,183.0,0.0,183.0,183.0,0.000000,0.000000,74.0,74.0,74.0,74.0,0.0,74.0,74.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61364,1,1,0,0,1,Long-term,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129050,49999768,2185-07-26 15:29:00,['R42'],"['H4010X4', 'G609', 'K7689', 'R0789', 'G4733',...",0,72.0,2183.0,70.0,2191-11-27,0,14,2185-07-26 15:15:00,2185-07-27 12:40:00,BLACK/CAPE VERDEAN,2185-07-26 16:45:00,55544.750000,2314.0,,0,1,0,0,0,37.5,92.986360,,,,,,,,,,,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,96.0,96.0,96.0,96.0,0.0,96.0,96.0,0.000000,0.000000,114.0,114.0,114.0,114.0,0.0,114.0,114.0,0.000000,0.000000,101.0,101.0,101.0,101.0,0.0,101.0,101.0,0.000000,0.000000,2.0,,,,15.0,4.1,84.0,18.0,0.9,28.0,0.2,9.4,86.0,2.1,1.5,265.0,32.8,10.2,,3.9,24.4,2.1,66.7,,,3.7,307.0,3.3,14.9,3.68,134.0,,64.0,8.9,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,591478,1,1,0,0,1,Long-term,0,1
129052,49999842,2162-09-09 15:45:00,"['Z98890', 'M79632', 'Z7901', 'M25562', 'M549'...",[],1,60.0,2156.0,54.0,,0,6,2162-09-09 15:27:00,2162-09-09 21:47:00,WHITE,2162-09-09 16:57:00,,,,0,0,0,0,1,,,,36.500000,36.500000,36.500000,36.500000,0.0,36.500000,36.500000,0.0,0.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,156.0,156.0,156.0,156.0,0.0,156.0,156.0,0.000000,0.000000,75.0,75.0,75.0,75.0,0.0,75.0,75.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,764408,0,0,0,0,0,Alive,0,0
129053,49999850,2170-09-30 10:31:00,"['N186', 'E119', 'R55', 'K859']","['N186', 'K219', 'Z7901', 'I739', 'D3500', 'M8...",0,71.0,2166.0,67.0,2176-11-23,0,11,2170-09-30 10:19:00,2170-09-30 16:38:00,HISPANIC/LATINO - MEXICAN,2170-09-30 11:49:00,53893.683333,2245.0,,0,0,1,0,0,34.9,89.357624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,21.0,4.1,124.0,64.0,0.4,24.0,0.5,,102.0,6.7,0.5,159.0,39.7,12.4,1.3,3.3,6.6,,89.9,13.9,21.7,,183.0,5.3,13.4,3.82,141.0,,50.0,10.5,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,350992,1,1,0,0,1,Long-term,0,1
129054,49999893,2161-12-08 18:13:00,['R42'],"['J341', 'Z9079', 'M47896', 'Z87891', 'M109', ...",1,73.0,2155.0,67.0,2162-05-25,0,9,2161-12-08 18:00:00,2161-12-09 02:33:00,BLACK/AFRICAN AMERICAN,2161-12-08 19:30:00,4014.000000,167.0,,0,1,0,0,0,26.0,75.400598,170.18,35.944444,35.944444,35.944444,35.944444,0.0,35.944444,35.944444,0.0,0.0,78.0,78.0,78.0,78.0,0.0,78.0,78.0,0.000000,0.000000,20.0,20.0,20.0,20.0,0.0,20.0,20.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,134.0,134.0,134.0,134.0,0.0,134.0,134.0,0.000000,0.000000,76.0,76.0,76.0,76.0,0.0,76.0,76.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,296265,1,1,0,1,0,Medium-term,0,1


In [26]:
# Create a df with discharged patients
# initial plan was to evaluate subset of patients who were discharged and died
# THIS WAS DONE BEFORE FEATURE ENGINIEERING AND DATA CLEANUP

# Identify mortality columns
mortality_cols = [c for c in df.columns if "mortality" in c.lower()]

# Subset to discharged patients
df_discharged = df[df["admit"] == 0].copy()

# Replace -999 with 0 in mortality columns
df_discharged[mortality_cols] = df_discharged[mortality_cols].replace(-999, 0)

# Count how many patients had a death flag (>0)
mortality_counts = (
    (df_discharged[mortality_cols] > 0).sum().sort_values(ascending=False)
)

print("Deaths among discharged patients:")
print(mortality_counts)

Deaths among discharged patients:
general_mortality_hours         4158
general_mortality_days          4133
deterioration_mortality_365d    1709
deterioration_mortality_180d    1026
deterioration_mortality_90d      585
deterioration_mortality_28d      214
deterioration_mortality_7d        77
deterioration_mortality_1d        29
deterioration_mortality_stay      10
dtype: int64


In [23]:
# datetime adjustments
s = pd.to_datetime(df["general_90min"], errors="coerce")

df["month_time"] = s.dt.month  # 1..12
df["tod_time"] = s.dt.hour.astype("Int64")
df[["month_time", "tod_time"]]

Unnamed: 0,month_time,tod_time
1,8,13
2,4,18
3,7,2
4,2,2
5,3,21
...,...,...
129050,7,16
129052,9,16
129053,9,11
129054,12,19


In [24]:
to_remove = [
    "general_dod",
    "general_ecg_no_within_stay",
    "general_strat_fold",
    "general_intime",
    "general_outtime",
    "general_ecg_time",
    "AAbificant_Digits",
    "general_icu_time_hours",
    "target",
    "Unnamed: 0",
    "general_study_id",
    "general_subject_id",
    "general_ed_stay_id",
    "general_ed_hadm_id",
    "general_data",
    "general_anchor_year",
    "general_anchor_age",
    "general_90min",
]

df = df.drop(columns=[c for c in to_remove if c in df.columns])

In [21]:
df.to_pickle("df_clean.pkl")

In [35]:
df['support_mortality_combo'].value_counts()

support_mortality_combo
noSupport_alive            85723
noSupport_death_>1y        11659
noSupport_death_29–365d    11207
noSupport_death_≤28d        3605
support_alive               2083
support_death_≤28d          1600
support_death_29–365d        678
support_death_>1y            440
Name: count, dtype: int64

# Original start location to load PKL with clean DF, but now using best.pkl with new features, further into the notebook

In [22]:
###################################
# Can start from here and load pkl

df = pd.read_pickle("./df_clean.pkl")

In [20]:
df.to_csv(
    "mds_10_15.csv",
    index=False,  # don't write the pandas index
    encoding="utf-8",  # good default
    na_rep="",  # how to write missing values
)

## Below we add more targets to our DF

In [7]:
organ_support = (
    (df["deterioration_ecmo"] == 1)
    | (df["deterioration_vasopressors"] == 1)
    | (df["deterioration_inotropes"] == 1)
    | (df["deterioration_mechanical_ventilation"] == 1)
    | (df["deterioration_cardiac_arrest"] == 1)
)

df["organ_support"] = organ_support.astype(int)

In [23]:
df

Unnamed: 0,general_ed_diag_ed,general_ed_diag_hosp,demographics_gender,demographics_age,general_race,general_mortality_hours,general_mortality_days,demographics_ethnicity_asian,demographics_ethnicity_black/african,demographics_ethnicity_hispanic/latino,demographics_ethnicity_other,demographics_ethnicity_white,biometrics_bmi,biometrics_weight,biometrics_height,vitals_temperature_mean,vitals_temperature_median,vitals_temperature_min,vitals_temperature_max,vitals_temperature_std,vitals_temperature_first,vitals_temperature_last,vitals_temperature_rate_change,vitals_temperature_coeff,vitals_heartrate_mean,vitals_heartrate_median,vitals_heartrate_min,vitals_heartrate_max,vitals_heartrate_std,vitals_heartrate_first,vitals_heartrate_last,vitals_heartrate_rate_change,vitals_heartrate_coeff,vitals_resprate_mean,vitals_resprate_median,vitals_resprate_min,vitals_resprate_max,vitals_resprate_std,vitals_resprate_first,vitals_resprate_last,vitals_resprate_rate_change,vitals_resprate_coeff,vitals_o2sat_mean,vitals_o2sat_median,vitals_o2sat_min,vitals_o2sat_max,vitals_o2sat_std,vitals_o2sat_first,vitals_o2sat_last,vitals_o2sat_rate_change,vitals_o2sat_coeff,vitals_sbp_mean,vitals_sbp_median,vitals_sbp_min,vitals_sbp_max,vitals_sbp_std,vitals_sbp_first,vitals_sbp_last,vitals_sbp_rate_change,vitals_sbp_coeff,vitals_dbp_mean,vitals_dbp_median,vitals_dbp_min,vitals_dbp_max,vitals_dbp_std,vitals_dbp_first,vitals_dbp_last,vitals_dbp_rate_change,vitals_dbp_coeff,vitals_acuity,labvalues_absolute_basophil_count_first,labvalues_absolute_eosinophil_count_first,labvalues_absolute_lymphocyte_count_first,labvalues_alanine_aminotransferase_(alt)_first,labvalues_albumin_first,labvalues_alkaline_phosphatase_first,labvalues_asparate_aminotransferase_(ast)_first,labvalues_basophils_first,labvalues_bicarbonate_first,"labvalues_bilirubin,_total_first","labvalues_calcium,_total_first",labvalues_chloride_first,labvalues_creatinine_first,labvalues_eosinophils_first,labvalues_glucose_first,labvalues_hematocrit_first,labvalues_hemoglobin_first,labvalues_inr(pt)_first,labvalues_lactate_first,labvalues_lymphocytes_first,labvalues_magnesium_first,labvalues_neutrophils_first,labvalues_pt_first,labvalues_ptt_first,labvalues_phosphate_first,labvalues_platelet_count_first,labvalues_potassium_first,labvalues_rdw_first,labvalues_red_blood_cells_first,labvalues_sodium_first,labvalues_troponin_t_first,labvalues_urea_nitrogen_first,labvalues_white_blood_cells_first,labvalues_ph_first,deterioration_severe_hypoxemia,deterioration_ecmo,deterioration_vasopressors,deterioration_inotropes,deterioration_mechanical_ventilation,deterioration_cardiac_arrest,deterioration_icu_24h,deterioration_icu_stay,deterioration_mortality_1d,deterioration_mortality_7d,deterioration_mortality_28d,deterioration_mortality_90d,deterioration_mortality_180d,deterioration_mortality_365d,deterioration_mortality_stay,admit,mortality_any,mortality_28d,mortality_365d,mortality_gt365d,mortality_category,clinical_deterioration_any,death_or_deterioration_any,month_time,tod_time,organ_support
1,"['R112', 'K920']",[],1,27.0,HISPANIC/LATINO - DOMINICAN,,,0,0,1,0,0,,,,37.277778,37.277778,37.277778,37.277778,0.0,37.277778,37.277778,0.0,0.0,60.5,60.5,57.0,64.0,3.5,64.0,57.0,-0.109375,-0.205882,16.5,16.5,15.0,18.0,1.5,18.0,15.0,-0.166667,-0.088235,98.5,98.5,98.0,99.0,0.5,99.0,98.0,-0.010101,-0.029412,112.5,112.5,103.0,122.0,9.5,122.0,103.0,-0.155738,-0.558824,68.0,68.0,66.0,70.0,2.0,70.0,66.0,-0.057143,-0.117647,2.0,0.04,0.27,1.32,,4.4,72.0,24.0,1.3,25.0,1.0,,102.0,0.7,8.8,92.0,46.8,15.8,1.0,,42.9,,35.3,11.4,32.0,,251.0,4.2,12.5,4.84,136.0,,7.0,3.1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,8,13,0
2,"['Z950', 'R001']",[],0,93.0,BLACK/AFRICAN AMERICAN,15487.466667,645.0,0,1,0,0,0,21.3,47.853956,,36.722222,36.722222,36.722222,36.722222,0.0,36.722222,36.722222,0.0,0.0,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,184.0,184.0,184.0,184.0,0.0,184.0,184.0,0.000000,0.000000,46.0,46.0,46.0,46.0,0.0,46.0,46.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,Long-term,0,1,4,18,0
3,"['Y0889XA', 'S066X0A']","['K219', 'Y048XXA', 'G43909', 'S066X0A', 'E785...",0,60.0,WHITE,,,0,0,0,0,1,25.3,53.070264,144.78,,,,,,,,,,92.0,92.0,90.0,94.0,2.0,94.0,90.0,-0.042553,-0.056338,16.0,16.0,14.0,18.0,2.0,14.0,18.0,0.285714,0.056338,99.0,99.0,99.0,99.0,0.0,99.0,99.0,0.000000,0.000000,158.5,158.5,146.0,171.0,12.5,171.0,146.0,-0.146199,-0.352113,102.0,102.0,100.0,104.0,2.0,104.0,100.0,-0.038462,-0.056338,1.0,0.14,0.27,2.38,,,,,0.7,26.0,,,101.0,0.8,1.4,101.0,41.3,13.6,0.9,,12.1,,79.2,9.8,27.1,,282.0,6.8,13.2,4.40,138.0,,11.0,19.6,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,Alive,0,0,7,2,0
4,['K922'],"['K766', 'B182', 'I8511', 'F17200', 'E119', 'K...",1,67.0,UNKNOWN,45118.783333,1879.0,0,0,0,1,0,,,,36.333333,36.333333,36.333333,36.333333,0.0,36.333333,36.333333,0.0,0.0,78.0,78.0,77.0,79.0,1.0,77.0,79.0,0.025974,0.054054,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,95.5,95.5,95.0,96.0,0.5,95.0,96.0,0.010526,0.027027,97.5,97.5,96.0,99.0,1.5,99.0,96.0,-0.030303,-0.081081,62.0,62.0,61.0,63.0,1.0,61.0,63.0,0.032787,0.054054,2.0,,,,25.0,2.8,73.0,39.0,0.5,25.0,0.7,,111.0,0.6,2.1,214.0,30.1,10.1,,,27.1,,63.5,,,,77.0,4.6,15.2,3.22,141.0,,22.0,4.8,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,1,1,2,2,0
5,"['W1830XA', 'S0090XA']","['F419', 'D649', 'E039', 'E860', 'Y92009', 'R1...",0,78.0,WHITE,17715.933333,738.0,0,0,0,0,1,26.4,65.398895,,36.555556,36.555556,36.555556,36.555556,0.0,36.555556,36.555556,0.0,0.0,72.0,72.0,72.0,72.0,0.0,72.0,72.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,183.0,183.0,183.0,183.0,0.0,183.0,183.0,0.000000,0.000000,74.0,74.0,74.0,74.0,0.0,74.0,74.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,3,21,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129050,['R42'],"['H4010X4', 'G609', 'K7689', 'R0789', 'G4733',...",0,72.0,BLACK/CAPE VERDEAN,55544.750000,2314.0,0,1,0,0,0,37.5,92.986360,,,,,,,,,,,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,96.0,96.0,96.0,96.0,0.0,96.0,96.0,0.000000,0.000000,114.0,114.0,114.0,114.0,0.0,114.0,114.0,0.000000,0.000000,101.0,101.0,101.0,101.0,0.0,101.0,101.0,0.000000,0.000000,2.0,,,,15.0,4.1,84.0,18.0,0.9,28.0,0.2,9.4,86.0,2.1,1.5,265.0,32.8,10.2,,3.9,24.4,2.1,66.7,,,3.7,307.0,3.3,14.9,3.68,134.0,,64.0,8.9,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,7,16,0
129052,"['Z98890', 'M79632', 'Z7901', 'M25562', 'M549'...",[],1,60.0,WHITE,,,0,0,0,0,1,,,,36.500000,36.500000,36.500000,36.500000,0.0,36.500000,36.500000,0.0,0.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,156.0,156.0,156.0,156.0,0.0,156.0,156.0,0.000000,0.000000,75.0,75.0,75.0,75.0,0.0,75.0,75.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,9,16,0
129053,"['N186', 'E119', 'R55', 'K859']","['N186', 'K219', 'Z7901', 'I739', 'D3500', 'M8...",0,71.0,HISPANIC/LATINO - MEXICAN,53893.683333,2245.0,0,0,1,0,0,34.9,89.357624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,21.0,4.1,124.0,64.0,0.4,24.0,0.5,,102.0,6.7,0.5,159.0,39.7,12.4,1.3,3.3,6.6,,89.9,13.9,21.7,,183.0,5.3,13.4,3.82,141.0,,50.0,10.5,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,9,11,0
129054,['R42'],"['J341', 'Z9079', 'M47896', 'Z87891', 'M109', ...",1,73.0,BLACK/AFRICAN AMERICAN,4014.000000,167.0,0,1,0,0,0,26.0,75.400598,170.18,35.944444,35.944444,35.944444,35.944444,0.0,35.944444,35.944444,0.0,0.0,78.0,78.0,78.0,78.0,0.0,78.0,78.0,0.000000,0.000000,20.0,20.0,20.0,20.0,0.0,20.0,20.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,134.0,134.0,134.0,134.0,0.0,134.0,134.0,0.000000,0.000000,76.0,76.0,76.0,76.0,0.0,76.0,76.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1,1,0,1,0,Medium-term,0,1,12,19,0


In [18]:

mortality_any = (df["mortality_any"] == 1)
death_28d     = (df["mortality_28d"] == 1)


# Organ support AND any death
organ_support_and_any_death = df[organ_support & mortality_any]
print("organ support AND any death:", organ_support_and_any_death.shape[0])

# Organ support AND died within 28 days
organ_support_and_death_28d = df[organ_support & death_28d]
print("organ support AND death ≤28d:", organ_support_and_death_28d.shape[0])

# Organ support AND NO death
organ_support_and_no_death = df[organ_support & ~mortality_any]
print("organ support AND no death:", organ_support_and_no_death.shape[0])

# Organ support AND NO death
no_organ_support_and_death = df[~organ_support & mortality_any]
print("no organ support AND death:", no_organ_support_and_death.shape[0])



print("total on organ support:", organ_support.sum())
print("total patients:", len(df))
print("total who died (any time):", mortality_any.sum())
print("total ECMO patients:", (df["deterioration_ecmo"] == 1).sum())
print("total dead by category", df['mortality_category'].value_counts())

organ support AND any death: 2718
organ support AND death ≤28d: 1600
organ support AND no death: 2083
no organ support AND death: 26471
total on organ support: 4801
total patients: 116995
total who died (any time): 29189
total ECMO patients: 159
total dead by category mortality_category
Alive          87806
Long-term      12099
Medium-term    11885
Short-term      5205
Name: count, dtype: int64


In [34]:
mort_28d    = (df["mortality_28d"] == 1)
mort_365d   = (df["mortality_365d"] == 1)   
mort_gt365d = (df["mortality_gt365d"] == 1)

mortality_class = np.select(
    condlist=[
        mort_28d,
        (mort_365d & ~mort_28d),   
        mort_gt365d
    ],
    choicelist=[1, 2, 3],
    default=0
).astype(int)

df["mortality_class"] = mortality_class

#create classes that represent multiple permutations of death time frame and need for organ support

support_label = np.where(df["organ_support"] == 1, "support", "noSupport")
mort_label = np.select(
    [df["mortality_class"].eq(0), df["mortality_class"].eq(1), df["mortality_class"].eq(2), df["mortality_class"].eq(3)],
    ["alive", "death_28d", "death_29_365d", "death_1y"]
)

combo = (
    pd.Series(support_label, index=df.index)
      .str.cat(pd.Series(mort_label, index=df.index), sep="_")
)

df["support_mortality_combo"] = pd.Categorical(
    combo,
    categories=[
        "noSupport_alive",
        "noSupport_death_28d",
        "noSupport_death_29_365d",
        "noSupport_death_1y",
        "support_alive",
        "support_death_28d",
        "support_death_29_365d",
        "support_death_1y",
    ]
)



Unnamed: 0,general_ed_diag_ed,general_ed_diag_hosp,demographics_gender,demographics_age,general_race,general_mortality_hours,general_mortality_days,demographics_ethnicity_asian,demographics_ethnicity_black/african,demographics_ethnicity_hispanic/latino,demographics_ethnicity_other,demographics_ethnicity_white,biometrics_bmi,biometrics_weight,biometrics_height,vitals_temperature_mean,vitals_temperature_median,vitals_temperature_min,vitals_temperature_max,vitals_temperature_std,vitals_temperature_first,vitals_temperature_last,vitals_temperature_rate_change,vitals_temperature_coeff,vitals_heartrate_mean,vitals_heartrate_median,vitals_heartrate_min,vitals_heartrate_max,vitals_heartrate_std,vitals_heartrate_first,vitals_heartrate_last,vitals_heartrate_rate_change,vitals_heartrate_coeff,vitals_resprate_mean,vitals_resprate_median,vitals_resprate_min,vitals_resprate_max,vitals_resprate_std,vitals_resprate_first,vitals_resprate_last,vitals_resprate_rate_change,vitals_resprate_coeff,vitals_o2sat_mean,vitals_o2sat_median,vitals_o2sat_min,vitals_o2sat_max,vitals_o2sat_std,vitals_o2sat_first,vitals_o2sat_last,vitals_o2sat_rate_change,vitals_o2sat_coeff,vitals_sbp_mean,vitals_sbp_median,vitals_sbp_min,vitals_sbp_max,vitals_sbp_std,vitals_sbp_first,vitals_sbp_last,vitals_sbp_rate_change,vitals_sbp_coeff,vitals_dbp_mean,vitals_dbp_median,vitals_dbp_min,vitals_dbp_max,vitals_dbp_std,vitals_dbp_first,vitals_dbp_last,vitals_dbp_rate_change,vitals_dbp_coeff,vitals_acuity,labvalues_absolute_basophil_count_first,labvalues_absolute_eosinophil_count_first,labvalues_absolute_lymphocyte_count_first,labvalues_alanine_aminotransferase_(alt)_first,labvalues_albumin_first,labvalues_alkaline_phosphatase_first,labvalues_asparate_aminotransferase_(ast)_first,labvalues_basophils_first,labvalues_bicarbonate_first,"labvalues_bilirubin,_total_first","labvalues_calcium,_total_first",labvalues_chloride_first,labvalues_creatinine_first,labvalues_eosinophils_first,labvalues_glucose_first,labvalues_hematocrit_first,labvalues_hemoglobin_first,labvalues_inr(pt)_first,labvalues_lactate_first,labvalues_lymphocytes_first,labvalues_magnesium_first,labvalues_neutrophils_first,labvalues_pt_first,labvalues_ptt_first,labvalues_phosphate_first,labvalues_platelet_count_first,labvalues_potassium_first,labvalues_rdw_first,labvalues_red_blood_cells_first,labvalues_sodium_first,labvalues_troponin_t_first,labvalues_urea_nitrogen_first,labvalues_white_blood_cells_first,labvalues_ph_first,deterioration_severe_hypoxemia,deterioration_ecmo,deterioration_vasopressors,deterioration_inotropes,deterioration_mechanical_ventilation,deterioration_cardiac_arrest,deterioration_icu_24h,deterioration_icu_stay,deterioration_mortality_1d,deterioration_mortality_7d,deterioration_mortality_28d,deterioration_mortality_90d,deterioration_mortality_180d,deterioration_mortality_365d,deterioration_mortality_stay,admit,mortality_any,mortality_28d,mortality_365d,mortality_gt365d,mortality_category,clinical_deterioration_any,death_or_deterioration_any,month_time,tod_time,organ_support,mortality_class,support_mortality_combo,shortterm_death_or_deterioration
1,"['R112', 'K920']",[],1,27.0,HISPANIC/LATINO - DOMINICAN,,,0,0,1,0,0,,,,37.277778,37.277778,37.277778,37.277778,0.0,37.277778,37.277778,0.0,0.0,60.5,60.5,57.0,64.0,3.5,64.0,57.0,-0.109375,-0.205882,16.5,16.5,15.0,18.0,1.5,18.0,15.0,-0.166667,-0.088235,98.5,98.5,98.0,99.0,0.5,99.0,98.0,-0.010101,-0.029412,112.5,112.5,103.0,122.0,9.5,122.0,103.0,-0.155738,-0.558824,68.0,68.0,66.0,70.0,2.0,70.0,66.0,-0.057143,-0.117647,2.0,0.04,0.27,1.32,,4.4,72.0,24.0,1.3,25.0,1.0,,102.0,0.7,8.8,92.0,46.8,15.8,1.0,,42.9,,35.3,11.4,32.0,,251.0,4.2,12.5,4.84,136.0,,7.0,3.1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,8,13,0,0,noSupport_alive,0
2,"['Z950', 'R001']",[],0,93.0,BLACK/AFRICAN AMERICAN,15487.466667,645.0,0,1,0,0,0,21.3,47.853956,,36.722222,36.722222,36.722222,36.722222,0.0,36.722222,36.722222,0.0,0.0,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,184.0,184.0,184.0,184.0,0.0,184.0,184.0,0.000000,0.000000,46.0,46.0,46.0,46.0,0.0,46.0,46.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,Long-term,0,1,4,18,0,3,noSupport_death_1y,0
3,"['Y0889XA', 'S066X0A']","['K219', 'Y048XXA', 'G43909', 'S066X0A', 'E785...",0,60.0,WHITE,,,0,0,0,0,1,25.3,53.070264,144.78,,,,,,,,,,92.0,92.0,90.0,94.0,2.0,94.0,90.0,-0.042553,-0.056338,16.0,16.0,14.0,18.0,2.0,14.0,18.0,0.285714,0.056338,99.0,99.0,99.0,99.0,0.0,99.0,99.0,0.000000,0.000000,158.5,158.5,146.0,171.0,12.5,171.0,146.0,-0.146199,-0.352113,102.0,102.0,100.0,104.0,2.0,104.0,100.0,-0.038462,-0.056338,1.0,0.14,0.27,2.38,,,,,0.7,26.0,,,101.0,0.8,1.4,101.0,41.3,13.6,0.9,,12.1,,79.2,9.8,27.1,,282.0,6.8,13.2,4.40,138.0,,11.0,19.6,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,Alive,0,0,7,2,0,0,noSupport_alive,0
4,['K922'],"['K766', 'B182', 'I8511', 'F17200', 'E119', 'K...",1,67.0,UNKNOWN,45118.783333,1879.0,0,0,0,1,0,,,,36.333333,36.333333,36.333333,36.333333,0.0,36.333333,36.333333,0.0,0.0,78.0,78.0,77.0,79.0,1.0,77.0,79.0,0.025974,0.054054,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,95.5,95.5,95.0,96.0,0.5,95.0,96.0,0.010526,0.027027,97.5,97.5,96.0,99.0,1.5,99.0,96.0,-0.030303,-0.081081,62.0,62.0,61.0,63.0,1.0,61.0,63.0,0.032787,0.054054,2.0,,,,25.0,2.8,73.0,39.0,0.5,25.0,0.7,,111.0,0.6,2.1,214.0,30.1,10.1,,,27.1,,63.5,,,,77.0,4.6,15.2,3.22,141.0,,22.0,4.8,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,1,1,2,2,0,3,noSupport_death_1y,0
5,"['W1830XA', 'S0090XA']","['F419', 'D649', 'E039', 'E860', 'Y92009', 'R1...",0,78.0,WHITE,17715.933333,738.0,0,0,0,0,1,26.4,65.398895,,36.555556,36.555556,36.555556,36.555556,0.0,36.555556,36.555556,0.0,0.0,72.0,72.0,72.0,72.0,0.0,72.0,72.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,183.0,183.0,183.0,183.0,0.0,183.0,183.0,0.000000,0.000000,74.0,74.0,74.0,74.0,0.0,74.0,74.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,3,21,0,3,noSupport_death_1y,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129050,['R42'],"['H4010X4', 'G609', 'K7689', 'R0789', 'G4733',...",0,72.0,BLACK/CAPE VERDEAN,55544.750000,2314.0,0,1,0,0,0,37.5,92.986360,,,,,,,,,,,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,96.0,96.0,96.0,96.0,0.0,96.0,96.0,0.000000,0.000000,114.0,114.0,114.0,114.0,0.0,114.0,114.0,0.000000,0.000000,101.0,101.0,101.0,101.0,0.0,101.0,101.0,0.000000,0.000000,2.0,,,,15.0,4.1,84.0,18.0,0.9,28.0,0.2,9.4,86.0,2.1,1.5,265.0,32.8,10.2,,3.9,24.4,2.1,66.7,,,3.7,307.0,3.3,14.9,3.68,134.0,,64.0,8.9,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,7,16,0,3,noSupport_death_1y,0
129052,"['Z98890', 'M79632', 'Z7901', 'M25562', 'M549'...",[],1,60.0,WHITE,,,0,0,0,0,1,,,,36.500000,36.500000,36.500000,36.500000,0.0,36.500000,36.500000,0.0,0.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,156.0,156.0,156.0,156.0,0.0,156.0,156.0,0.000000,0.000000,75.0,75.0,75.0,75.0,0.0,75.0,75.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,9,16,0,0,noSupport_alive,0
129053,"['N186', 'E119', 'R55', 'K859']","['N186', 'K219', 'Z7901', 'I739', 'D3500', 'M8...",0,71.0,HISPANIC/LATINO - MEXICAN,53893.683333,2245.0,0,0,1,0,0,34.9,89.357624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,21.0,4.1,124.0,64.0,0.4,24.0,0.5,,102.0,6.7,0.5,159.0,39.7,12.4,1.3,3.3,6.6,,89.9,13.9,21.7,,183.0,5.3,13.4,3.82,141.0,,50.0,10.5,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,9,11,0,3,noSupport_death_1y,0
129054,['R42'],"['J341', 'Z9079', 'M47896', 'Z87891', 'M109', ...",1,73.0,BLACK/AFRICAN AMERICAN,4014.000000,167.0,0,1,0,0,0,26.0,75.400598,170.18,35.944444,35.944444,35.944444,35.944444,0.0,35.944444,35.944444,0.0,0.0,78.0,78.0,78.0,78.0,0.0,78.0,78.0,0.000000,0.000000,20.0,20.0,20.0,20.0,0.0,20.0,20.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,134.0,134.0,134.0,134.0,0.0,134.0,134.0,0.000000,0.000000,76.0,76.0,76.0,76.0,0.0,76.0,76.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1,1,0,1,0,Medium-term,0,1,12,19,0,2,noSupport_death_29_365d,0


In [6]:
mapper = {
    'noSupport_alive': 0,
    'noSupport_death_28d': 1,
    'noSupport_death_29_365d': 2,
    'noSupport_death_1y': 3,
    'support_alive': 4,
    'support_death_28d': 5,
    'support_death_29_365d': 6,
    'support_death_1y': 7
}

df['support_mortality_combo_id'] = df['support_mortality_combo'].map(mapper).astype('Int64')

In [None]:
death_28d = (df["mortality_28d"] == 1)

# New binary label that will serve as our target
df["shortterm_death_or_deterioration"] = (organ_support | death_28d).astype(int)

In [39]:
df

Unnamed: 0,general_ed_diag_ed,general_ed_diag_hosp,demographics_gender,demographics_age,general_race,general_mortality_hours,general_mortality_days,demographics_ethnicity_asian,demographics_ethnicity_black/african,demographics_ethnicity_hispanic/latino,demographics_ethnicity_other,demographics_ethnicity_white,biometrics_bmi,biometrics_weight,biometrics_height,vitals_temperature_mean,vitals_temperature_median,vitals_temperature_min,vitals_temperature_max,vitals_temperature_std,vitals_temperature_first,vitals_temperature_last,vitals_temperature_rate_change,vitals_temperature_coeff,vitals_heartrate_mean,vitals_heartrate_median,vitals_heartrate_min,vitals_heartrate_max,vitals_heartrate_std,vitals_heartrate_first,vitals_heartrate_last,vitals_heartrate_rate_change,vitals_heartrate_coeff,vitals_resprate_mean,vitals_resprate_median,vitals_resprate_min,vitals_resprate_max,vitals_resprate_std,vitals_resprate_first,vitals_resprate_last,vitals_resprate_rate_change,vitals_resprate_coeff,vitals_o2sat_mean,vitals_o2sat_median,vitals_o2sat_min,vitals_o2sat_max,vitals_o2sat_std,vitals_o2sat_first,vitals_o2sat_last,vitals_o2sat_rate_change,vitals_o2sat_coeff,vitals_sbp_mean,vitals_sbp_median,vitals_sbp_min,vitals_sbp_max,vitals_sbp_std,vitals_sbp_first,vitals_sbp_last,vitals_sbp_rate_change,vitals_sbp_coeff,vitals_dbp_mean,vitals_dbp_median,vitals_dbp_min,vitals_dbp_max,vitals_dbp_std,vitals_dbp_first,vitals_dbp_last,vitals_dbp_rate_change,vitals_dbp_coeff,vitals_acuity,labvalues_absolute_basophil_count_first,labvalues_absolute_eosinophil_count_first,labvalues_absolute_lymphocyte_count_first,labvalues_alanine_aminotransferase_(alt)_first,labvalues_albumin_first,labvalues_alkaline_phosphatase_first,labvalues_asparate_aminotransferase_(ast)_first,labvalues_basophils_first,labvalues_bicarbonate_first,"labvalues_bilirubin,_total_first","labvalues_calcium,_total_first",labvalues_chloride_first,labvalues_creatinine_first,labvalues_eosinophils_first,labvalues_glucose_first,labvalues_hematocrit_first,labvalues_hemoglobin_first,labvalues_inr(pt)_first,labvalues_lactate_first,labvalues_lymphocytes_first,labvalues_magnesium_first,labvalues_neutrophils_first,labvalues_pt_first,labvalues_ptt_first,labvalues_phosphate_first,labvalues_platelet_count_first,labvalues_potassium_first,labvalues_rdw_first,labvalues_red_blood_cells_first,labvalues_sodium_first,labvalues_troponin_t_first,labvalues_urea_nitrogen_first,labvalues_white_blood_cells_first,labvalues_ph_first,deterioration_severe_hypoxemia,deterioration_ecmo,deterioration_vasopressors,deterioration_inotropes,deterioration_mechanical_ventilation,deterioration_cardiac_arrest,deterioration_icu_24h,deterioration_icu_stay,deterioration_mortality_1d,deterioration_mortality_7d,deterioration_mortality_28d,deterioration_mortality_90d,deterioration_mortality_180d,deterioration_mortality_365d,deterioration_mortality_stay,admit,mortality_any,mortality_28d,mortality_365d,mortality_gt365d,mortality_category,clinical_deterioration_any,death_or_deterioration_any,month_time,tod_time,organ_support,mortality_class,support_mortality_combo,shortterm_death_or_deterioration,support_mortality_combo_id
1,"['R112', 'K920']",[],1,27.0,HISPANIC/LATINO - DOMINICAN,,,0,0,1,0,0,,,,37.277778,37.277778,37.277778,37.277778,0.0,37.277778,37.277778,0.0,0.0,60.5,60.5,57.0,64.0,3.5,64.0,57.0,-0.109375,-0.205882,16.5,16.5,15.0,18.0,1.5,18.0,15.0,-0.166667,-0.088235,98.5,98.5,98.0,99.0,0.5,99.0,98.0,-0.010101,-0.029412,112.5,112.5,103.0,122.0,9.5,122.0,103.0,-0.155738,-0.558824,68.0,68.0,66.0,70.0,2.0,70.0,66.0,-0.057143,-0.117647,2.0,0.04,0.27,1.32,,4.4,72.0,24.0,1.3,25.0,1.0,,102.0,0.7,8.8,92.0,46.8,15.8,1.0,,42.9,,35.3,11.4,32.0,,251.0,4.2,12.5,4.84,136.0,,7.0,3.1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,8,13,0,0,noSupport_alive,0,0
2,"['Z950', 'R001']",[],0,93.0,BLACK/AFRICAN AMERICAN,15487.466667,645.0,0,1,0,0,0,21.3,47.853956,,36.722222,36.722222,36.722222,36.722222,0.0,36.722222,36.722222,0.0,0.0,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,184.0,184.0,184.0,184.0,0.0,184.0,184.0,0.000000,0.000000,46.0,46.0,46.0,46.0,0.0,46.0,46.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,Long-term,0,1,4,18,0,3,noSupport_death_1y,0,3
3,"['Y0889XA', 'S066X0A']","['K219', 'Y048XXA', 'G43909', 'S066X0A', 'E785...",0,60.0,WHITE,,,0,0,0,0,1,25.3,53.070264,144.78,,,,,,,,,,92.0,92.0,90.0,94.0,2.0,94.0,90.0,-0.042553,-0.056338,16.0,16.0,14.0,18.0,2.0,14.0,18.0,0.285714,0.056338,99.0,99.0,99.0,99.0,0.0,99.0,99.0,0.000000,0.000000,158.5,158.5,146.0,171.0,12.5,171.0,146.0,-0.146199,-0.352113,102.0,102.0,100.0,104.0,2.0,104.0,100.0,-0.038462,-0.056338,1.0,0.14,0.27,2.38,,,,,0.7,26.0,,,101.0,0.8,1.4,101.0,41.3,13.6,0.9,,12.1,,79.2,9.8,27.1,,282.0,6.8,13.2,4.40,138.0,,11.0,19.6,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,Alive,0,0,7,2,0,0,noSupport_alive,0,0
4,['K922'],"['K766', 'B182', 'I8511', 'F17200', 'E119', 'K...",1,67.0,UNKNOWN,45118.783333,1879.0,0,0,0,1,0,,,,36.333333,36.333333,36.333333,36.333333,0.0,36.333333,36.333333,0.0,0.0,78.0,78.0,77.0,79.0,1.0,77.0,79.0,0.025974,0.054054,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,95.5,95.5,95.0,96.0,0.5,95.0,96.0,0.010526,0.027027,97.5,97.5,96.0,99.0,1.5,99.0,96.0,-0.030303,-0.081081,62.0,62.0,61.0,63.0,1.0,61.0,63.0,0.032787,0.054054,2.0,,,,25.0,2.8,73.0,39.0,0.5,25.0,0.7,,111.0,0.6,2.1,214.0,30.1,10.1,,,27.1,,63.5,,,,77.0,4.6,15.2,3.22,141.0,,22.0,4.8,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,1,1,2,2,0,3,noSupport_death_1y,0,3
5,"['W1830XA', 'S0090XA']","['F419', 'D649', 'E039', 'E860', 'Y92009', 'R1...",0,78.0,WHITE,17715.933333,738.0,0,0,0,0,1,26.4,65.398895,,36.555556,36.555556,36.555556,36.555556,0.0,36.555556,36.555556,0.0,0.0,72.0,72.0,72.0,72.0,0.0,72.0,72.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,183.0,183.0,183.0,183.0,0.0,183.0,183.0,0.000000,0.000000,74.0,74.0,74.0,74.0,0.0,74.0,74.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,3,21,0,3,noSupport_death_1y,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129050,['R42'],"['H4010X4', 'G609', 'K7689', 'R0789', 'G4733',...",0,72.0,BLACK/CAPE VERDEAN,55544.750000,2314.0,0,1,0,0,0,37.5,92.986360,,,,,,,,,,,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,96.0,96.0,96.0,96.0,0.0,96.0,96.0,0.000000,0.000000,114.0,114.0,114.0,114.0,0.0,114.0,114.0,0.000000,0.000000,101.0,101.0,101.0,101.0,0.0,101.0,101.0,0.000000,0.000000,2.0,,,,15.0,4.1,84.0,18.0,0.9,28.0,0.2,9.4,86.0,2.1,1.5,265.0,32.8,10.2,,3.9,24.4,2.1,66.7,,,3.7,307.0,3.3,14.9,3.68,134.0,,64.0,8.9,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,7,16,0,3,noSupport_death_1y,0,3
129052,"['Z98890', 'M79632', 'Z7901', 'M25562', 'M549'...",[],1,60.0,WHITE,,,0,0,0,0,1,,,,36.500000,36.500000,36.500000,36.500000,0.0,36.500000,36.500000,0.0,0.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,156.0,156.0,156.0,156.0,0.0,156.0,156.0,0.000000,0.000000,75.0,75.0,75.0,75.0,0.0,75.0,75.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,9,16,0,0,noSupport_alive,0,0
129053,"['N186', 'E119', 'R55', 'K859']","['N186', 'K219', 'Z7901', 'I739', 'D3500', 'M8...",0,71.0,HISPANIC/LATINO - MEXICAN,53893.683333,2245.0,0,0,1,0,0,34.9,89.357624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,21.0,4.1,124.0,64.0,0.4,24.0,0.5,,102.0,6.7,0.5,159.0,39.7,12.4,1.3,3.3,6.6,,89.9,13.9,21.7,,183.0,5.3,13.4,3.82,141.0,,50.0,10.5,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,9,11,0,3,noSupport_death_1y,0,3
129054,['R42'],"['J341', 'Z9079', 'M47896', 'Z87891', 'M109', ...",1,73.0,BLACK/AFRICAN AMERICAN,4014.000000,167.0,0,1,0,0,0,26.0,75.400598,170.18,35.944444,35.944444,35.944444,35.944444,0.0,35.944444,35.944444,0.0,0.0,78.0,78.0,78.0,78.0,0.0,78.0,78.0,0.000000,0.000000,20.0,20.0,20.0,20.0,0.0,20.0,20.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,134.0,134.0,134.0,134.0,0.0,134.0,134.0,0.000000,0.000000,76.0,76.0,76.0,76.0,0.0,76.0,76.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1,1,0,1,0,Medium-term,0,1,12,19,0,2,noSupport_death_29_365d,0,2


In [7]:
df.to_pickle("df_best.pkl")

# Starting place after new features added, best pkl created

In [22]:
df = pd.read_pickle("./df_best.pkl")

In [23]:
df.to_csv(
    "mds_best.csv",
    index=False,  # don't write the pandas index
    encoding="utf-8",  # good default
    na_rep="",  # how to write missing values
)

In [24]:
df

Unnamed: 0,general_ed_diag_ed,general_ed_diag_hosp,demographics_gender,demographics_age,general_race,general_mortality_hours,general_mortality_days,demographics_ethnicity_asian,demographics_ethnicity_black/african,demographics_ethnicity_hispanic/latino,demographics_ethnicity_other,demographics_ethnicity_white,biometrics_bmi,biometrics_weight,biometrics_height,vitals_temperature_mean,vitals_temperature_median,vitals_temperature_min,vitals_temperature_max,vitals_temperature_std,vitals_temperature_first,vitals_temperature_last,vitals_temperature_rate_change,vitals_temperature_coeff,vitals_heartrate_mean,vitals_heartrate_median,vitals_heartrate_min,vitals_heartrate_max,vitals_heartrate_std,vitals_heartrate_first,vitals_heartrate_last,vitals_heartrate_rate_change,vitals_heartrate_coeff,vitals_resprate_mean,vitals_resprate_median,vitals_resprate_min,vitals_resprate_max,vitals_resprate_std,vitals_resprate_first,vitals_resprate_last,vitals_resprate_rate_change,vitals_resprate_coeff,vitals_o2sat_mean,vitals_o2sat_median,vitals_o2sat_min,vitals_o2sat_max,vitals_o2sat_std,vitals_o2sat_first,vitals_o2sat_last,vitals_o2sat_rate_change,vitals_o2sat_coeff,vitals_sbp_mean,vitals_sbp_median,vitals_sbp_min,vitals_sbp_max,vitals_sbp_std,vitals_sbp_first,vitals_sbp_last,vitals_sbp_rate_change,vitals_sbp_coeff,vitals_dbp_mean,vitals_dbp_median,vitals_dbp_min,vitals_dbp_max,vitals_dbp_std,vitals_dbp_first,vitals_dbp_last,vitals_dbp_rate_change,vitals_dbp_coeff,vitals_acuity,labvalues_absolute_basophil_count_first,labvalues_absolute_eosinophil_count_first,labvalues_absolute_lymphocyte_count_first,labvalues_alanine_aminotransferase_(alt)_first,labvalues_albumin_first,labvalues_alkaline_phosphatase_first,labvalues_asparate_aminotransferase_(ast)_first,labvalues_basophils_first,labvalues_bicarbonate_first,"labvalues_bilirubin,_total_first","labvalues_calcium,_total_first",labvalues_chloride_first,labvalues_creatinine_first,labvalues_eosinophils_first,labvalues_glucose_first,labvalues_hematocrit_first,labvalues_hemoglobin_first,labvalues_inr(pt)_first,labvalues_lactate_first,labvalues_lymphocytes_first,labvalues_magnesium_first,labvalues_neutrophils_first,labvalues_pt_first,labvalues_ptt_first,labvalues_phosphate_first,labvalues_platelet_count_first,labvalues_potassium_first,labvalues_rdw_first,labvalues_red_blood_cells_first,labvalues_sodium_first,labvalues_troponin_t_first,labvalues_urea_nitrogen_first,labvalues_white_blood_cells_first,labvalues_ph_first,deterioration_severe_hypoxemia,deterioration_ecmo,deterioration_vasopressors,deterioration_inotropes,deterioration_mechanical_ventilation,deterioration_cardiac_arrest,deterioration_icu_24h,deterioration_icu_stay,deterioration_mortality_1d,deterioration_mortality_7d,deterioration_mortality_28d,deterioration_mortality_90d,deterioration_mortality_180d,deterioration_mortality_365d,deterioration_mortality_stay,admit,mortality_any,mortality_28d,mortality_365d,mortality_gt365d,mortality_category,clinical_deterioration_any,death_or_deterioration_any,month_time,tod_time,organ_support,mortality_class,support_mortality_combo,shortterm_death_or_deterioration,support_mortality_combo_id
1,"['R112', 'K920']",[],1,27.0,HISPANIC/LATINO - DOMINICAN,,,0,0,1,0,0,,,,37.277778,37.277778,37.277778,37.277778,0.0,37.277778,37.277778,0.0,0.0,60.5,60.5,57.0,64.0,3.5,64.0,57.0,-0.109375,-0.205882,16.5,16.5,15.0,18.0,1.5,18.0,15.0,-0.166667,-0.088235,98.5,98.5,98.0,99.0,0.5,99.0,98.0,-0.010101,-0.029412,112.5,112.5,103.0,122.0,9.5,122.0,103.0,-0.155738,-0.558824,68.0,68.0,66.0,70.0,2.0,70.0,66.0,-0.057143,-0.117647,2.0,0.04,0.27,1.32,,4.4,72.0,24.0,1.3,25.0,1.0,,102.0,0.7,8.8,92.0,46.8,15.8,1.0,,42.9,,35.3,11.4,32.0,,251.0,4.2,12.5,4.84,136.0,,7.0,3.1,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,8,13,0,0,noSupport_alive,0,0
2,"['Z950', 'R001']",[],0,93.0,BLACK/AFRICAN AMERICAN,15487.466667,645.0,0,1,0,0,0,21.3,47.853956,,36.722222,36.722222,36.722222,36.722222,0.0,36.722222,36.722222,0.0,0.0,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,184.0,184.0,184.0,184.0,0.0,184.0,184.0,0.000000,0.000000,46.0,46.0,46.0,46.0,0.0,46.0,46.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,0,1,Long-term,0,1,4,18,0,3,noSupport_death_1y,0,3
3,"['Y0889XA', 'S066X0A']","['K219', 'Y048XXA', 'G43909', 'S066X0A', 'E785...",0,60.0,WHITE,,,0,0,0,0,1,25.3,53.070264,144.78,,,,,,,,,,92.0,92.0,90.0,94.0,2.0,94.0,90.0,-0.042553,-0.056338,16.0,16.0,14.0,18.0,2.0,14.0,18.0,0.285714,0.056338,99.0,99.0,99.0,99.0,0.0,99.0,99.0,0.000000,0.000000,158.5,158.5,146.0,171.0,12.5,171.0,146.0,-0.146199,-0.352113,102.0,102.0,100.0,104.0,2.0,104.0,100.0,-0.038462,-0.056338,1.0,0.14,0.27,2.38,,,,,0.7,26.0,,,101.0,0.8,1.4,101.0,41.3,13.6,0.9,,12.1,,79.2,9.8,27.1,,282.0,6.8,13.2,4.40,138.0,,11.0,19.6,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0,Alive,0,0,7,2,0,0,noSupport_alive,0,0
4,['K922'],"['K766', 'B182', 'I8511', 'F17200', 'E119', 'K...",1,67.0,UNKNOWN,45118.783333,1879.0,0,0,0,1,0,,,,36.333333,36.333333,36.333333,36.333333,0.0,36.333333,36.333333,0.0,0.0,78.0,78.0,77.0,79.0,1.0,77.0,79.0,0.025974,0.054054,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,95.5,95.5,95.0,96.0,0.5,95.0,96.0,0.010526,0.027027,97.5,97.5,96.0,99.0,1.5,99.0,96.0,-0.030303,-0.081081,62.0,62.0,61.0,63.0,1.0,61.0,63.0,0.032787,0.054054,2.0,,,,25.0,2.8,73.0,39.0,0.5,25.0,0.7,,111.0,0.6,2.1,214.0,30.1,10.1,,,27.1,,63.5,,,,77.0,4.6,15.2,3.22,141.0,,22.0,4.8,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,1,1,2,2,0,3,noSupport_death_1y,0,3
5,"['W1830XA', 'S0090XA']","['F419', 'D649', 'E039', 'E860', 'Y92009', 'R1...",0,78.0,WHITE,17715.933333,738.0,0,0,0,0,1,26.4,65.398895,,36.555556,36.555556,36.555556,36.555556,0.0,36.555556,36.555556,0.0,0.0,72.0,72.0,72.0,72.0,0.0,72.0,72.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,183.0,183.0,183.0,183.0,0.0,183.0,183.0,0.000000,0.000000,74.0,74.0,74.0,74.0,0.0,74.0,74.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,3,21,0,3,noSupport_death_1y,0,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129050,['R42'],"['H4010X4', 'G609', 'K7689', 'R0789', 'G4733',...",0,72.0,BLACK/CAPE VERDEAN,55544.750000,2314.0,0,1,0,0,0,37.5,92.986360,,,,,,,,,,,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,96.0,96.0,96.0,96.0,0.0,96.0,96.0,0.000000,0.000000,114.0,114.0,114.0,114.0,0.0,114.0,114.0,0.000000,0.000000,101.0,101.0,101.0,101.0,0.0,101.0,101.0,0.000000,0.000000,2.0,,,,15.0,4.1,84.0,18.0,0.9,28.0,0.2,9.4,86.0,2.1,1.5,265.0,32.8,10.2,,3.9,24.4,2.1,66.7,,,3.7,307.0,3.3,14.9,3.68,134.0,,64.0,8.9,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,7,16,0,3,noSupport_death_1y,0,3
129052,"['Z98890', 'M79632', 'Z7901', 'M25562', 'M549'...",[],1,60.0,WHITE,,,0,0,0,0,1,,,,36.500000,36.500000,36.500000,36.500000,0.0,36.500000,36.500000,0.0,0.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,156.0,156.0,156.0,156.0,0.0,156.0,156.0,0.000000,0.000000,75.0,75.0,75.0,75.0,0.0,75.0,75.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,Alive,0,0,9,16,0,0,noSupport_alive,0,0
129053,"['N186', 'E119', 'R55', 'K859']","['N186', 'K219', 'Z7901', 'I739', 'D3500', 'M8...",0,71.0,HISPANIC/LATINO - MEXICAN,53893.683333,2245.0,0,0,1,0,0,34.9,89.357624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,21.0,4.1,124.0,64.0,0.4,24.0,0.5,,102.0,6.7,0.5,159.0,39.7,12.4,1.3,3.3,6.6,,89.9,13.9,21.7,,183.0,5.3,13.4,3.82,141.0,,50.0,10.5,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1,0,0,1,Long-term,0,1,9,11,0,3,noSupport_death_1y,0,3
129054,['R42'],"['J341', 'Z9079', 'M47896', 'Z87891', 'M109', ...",1,73.0,BLACK/AFRICAN AMERICAN,4014.000000,167.0,0,1,0,0,0,26.0,75.400598,170.18,35.944444,35.944444,35.944444,35.944444,0.0,35.944444,35.944444,0.0,0.0,78.0,78.0,78.0,78.0,0.0,78.0,78.0,0.000000,0.000000,20.0,20.0,20.0,20.0,0.0,20.0,20.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,134.0,134.0,134.0,134.0,0.0,134.0,134.0,0.000000,0.000000,76.0,76.0,76.0,76.0,0.0,76.0,76.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1,1,0,1,0,Medium-term,0,1,12,19,0,2,noSupport_death_29_365d,0,2


In [29]:
def make_model_ready(df,targets_to_remove,target_to_add):
    """
    Prepares the dataframe by selecting model input columns.
    Excludes mortality, outcome, target, and general metadata columns.
    excluding targets with target_to_remvoe: 'clinical_deterioration_any','death_or_deterioration_any'
    """
    all_cols = df.columns.tolist()
    mortality_cols = [c for c in all_cols if c.startswith('mortality_')]
    outcome_cols = [c for c in all_cols if c.startswith('deterioration_')]
    general_cols = [c for c in all_cols if c.startswith('general_')]
    target_cols = ['target'] if 'target' in all_cols else []
    to_remove = [
        'target','Unnamed:0','general_subject_id','mortality_category','Alive',
        'Short-term','Medium-term','Long-term','admit'
    ]
    to_remove =to_remove + targets_to_remove    
    fit_cols = [c for c in all_cols if c not in set(
        outcome_cols + target_cols + to_remove + mortality_cols + general_cols
    )]
    
    print(f"Columns included for modeling ({len(fit_cols)}): {fit_cols[-10:]} ...")
    fit_cols.append(target_to_add) 
    return fit_cols


In [42]:
targets_to_remove = ['organ_support','mortality_class'	,'support_mortality_combo','shortterm_death_or_deterioration',
                     'clinical_deterioration_any','death_or_deterioration_any','mortality_category','suppoert_mortality_combo', 
                    'support_mortality_combo_id']
target_to_add='support_mortality_combo_id'
#define Targets HERE 
colz = make_model_ready(df,targets_to_remove, target_to_add )
print(len(colz)) 

Columns included for modeling (101): ['labvalues_potassium_first', 'labvalues_rdw_first', 'labvalues_red_blood_cells_first', 'labvalues_sodium_first', 'labvalues_troponin_t_first', 'labvalues_urea_nitrogen_first', 'labvalues_white_blood_cells_first', 'labvalues_ph_first', 'month_time', 'tod_time'] ...
102


In [55]:
X=df[colz]
X

Unnamed: 0,demographics_gender,demographics_age,demographics_ethnicity_asian,demographics_ethnicity_black/african,demographics_ethnicity_hispanic/latino,demographics_ethnicity_other,demographics_ethnicity_white,biometrics_bmi,biometrics_weight,biometrics_height,vitals_temperature_mean,vitals_temperature_median,vitals_temperature_min,vitals_temperature_max,vitals_temperature_std,vitals_temperature_first,vitals_temperature_last,vitals_temperature_rate_change,vitals_temperature_coeff,vitals_heartrate_mean,vitals_heartrate_median,vitals_heartrate_min,vitals_heartrate_max,vitals_heartrate_std,vitals_heartrate_first,vitals_heartrate_last,vitals_heartrate_rate_change,vitals_heartrate_coeff,vitals_resprate_mean,vitals_resprate_median,vitals_resprate_min,vitals_resprate_max,vitals_resprate_std,vitals_resprate_first,vitals_resprate_last,vitals_resprate_rate_change,vitals_resprate_coeff,vitals_o2sat_mean,vitals_o2sat_median,vitals_o2sat_min,vitals_o2sat_max,vitals_o2sat_std,vitals_o2sat_first,vitals_o2sat_last,vitals_o2sat_rate_change,vitals_o2sat_coeff,vitals_sbp_mean,vitals_sbp_median,vitals_sbp_min,vitals_sbp_max,vitals_sbp_std,vitals_sbp_first,vitals_sbp_last,vitals_sbp_rate_change,vitals_sbp_coeff,vitals_dbp_mean,vitals_dbp_median,vitals_dbp_min,vitals_dbp_max,vitals_dbp_std,vitals_dbp_first,vitals_dbp_last,vitals_dbp_rate_change,vitals_dbp_coeff,vitals_acuity,labvalues_absolute_basophil_count_first,labvalues_absolute_eosinophil_count_first,labvalues_absolute_lymphocyte_count_first,labvalues_alanine_aminotransferase_(alt)_first,labvalues_albumin_first,labvalues_alkaline_phosphatase_first,labvalues_asparate_aminotransferase_(ast)_first,labvalues_basophils_first,labvalues_bicarbonate_first,"labvalues_bilirubin,_total_first","labvalues_calcium,_total_first",labvalues_chloride_first,labvalues_creatinine_first,labvalues_eosinophils_first,labvalues_glucose_first,labvalues_hematocrit_first,labvalues_hemoglobin_first,labvalues_inr(pt)_first,labvalues_lactate_first,labvalues_lymphocytes_first,labvalues_magnesium_first,labvalues_neutrophils_first,labvalues_pt_first,labvalues_ptt_first,labvalues_phosphate_first,labvalues_platelet_count_first,labvalues_potassium_first,labvalues_rdw_first,labvalues_red_blood_cells_first,labvalues_sodium_first,labvalues_troponin_t_first,labvalues_urea_nitrogen_first,labvalues_white_blood_cells_first,labvalues_ph_first,month_time,tod_time,support_mortality_combo_id
1,1,27.0,0,0,1,0,0,,,,37.277778,37.277778,37.277778,37.277778,0.0,37.277778,37.277778,0.0,0.0,60.5,60.5,57.0,64.0,3.5,64.0,57.0,-0.109375,-0.205882,16.5,16.5,15.0,18.0,1.5,18.0,15.0,-0.166667,-0.088235,98.5,98.5,98.0,99.0,0.5,99.0,98.0,-0.010101,-0.029412,112.5,112.5,103.0,122.0,9.5,122.0,103.0,-0.155738,-0.558824,68.0,68.0,66.0,70.0,2.0,70.0,66.0,-0.057143,-0.117647,2.0,0.04,0.27,1.32,,4.4,72.0,24.0,1.3,25.0,1.0,,102.0,0.7,8.8,92.0,46.8,15.8,1.0,,42.9,,35.3,11.4,32.0,,251.0,4.2,12.5,4.84,136.0,,7.0,3.1,,8,13,0
2,0,93.0,0,1,0,0,0,21.3,47.853956,,36.722222,36.722222,36.722222,36.722222,0.0,36.722222,36.722222,0.0,0.0,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,184.0,184.0,184.0,184.0,0.0,184.0,184.0,0.000000,0.000000,46.0,46.0,46.0,46.0,0.0,46.0,46.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4,18,3
3,0,60.0,0,0,0,0,1,25.3,53.070264,144.78,,,,,,,,,,92.0,92.0,90.0,94.0,2.0,94.0,90.0,-0.042553,-0.056338,16.0,16.0,14.0,18.0,2.0,14.0,18.0,0.285714,0.056338,99.0,99.0,99.0,99.0,0.0,99.0,99.0,0.000000,0.000000,158.5,158.5,146.0,171.0,12.5,171.0,146.0,-0.146199,-0.352113,102.0,102.0,100.0,104.0,2.0,104.0,100.0,-0.038462,-0.056338,1.0,0.14,0.27,2.38,,,,,0.7,26.0,,,101.0,0.8,1.4,101.0,41.3,13.6,0.9,,12.1,,79.2,9.8,27.1,,282.0,6.8,13.2,4.40,138.0,,11.0,19.6,,7,2,0
4,1,67.0,0,0,0,1,0,,,,36.333333,36.333333,36.333333,36.333333,0.0,36.333333,36.333333,0.0,0.0,78.0,78.0,77.0,79.0,1.0,77.0,79.0,0.025974,0.054054,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,95.5,95.5,95.0,96.0,0.5,95.0,96.0,0.010526,0.027027,97.5,97.5,96.0,99.0,1.5,99.0,96.0,-0.030303,-0.081081,62.0,62.0,61.0,63.0,1.0,61.0,63.0,0.032787,0.054054,2.0,,,,25.0,2.8,73.0,39.0,0.5,25.0,0.7,,111.0,0.6,2.1,214.0,30.1,10.1,,,27.1,,63.5,,,,77.0,4.6,15.2,3.22,141.0,,22.0,4.8,,2,2,3
5,0,78.0,0,0,0,0,1,26.4,65.398895,,36.555556,36.555556,36.555556,36.555556,0.0,36.555556,36.555556,0.0,0.0,72.0,72.0,72.0,72.0,0.0,72.0,72.0,0.000000,0.000000,18.0,18.0,18.0,18.0,0.0,18.0,18.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,183.0,183.0,183.0,183.0,0.0,183.0,183.0,0.000000,0.000000,74.0,74.0,74.0,74.0,0.0,74.0,74.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,21,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129050,0,72.0,0,1,0,0,0,37.5,92.986360,,,,,,,,,,,70.0,70.0,70.0,70.0,0.0,70.0,70.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,96.0,96.0,96.0,96.0,0.0,96.0,96.0,0.000000,0.000000,114.0,114.0,114.0,114.0,0.0,114.0,114.0,0.000000,0.000000,101.0,101.0,101.0,101.0,0.0,101.0,101.0,0.000000,0.000000,2.0,,,,15.0,4.1,84.0,18.0,0.9,28.0,0.2,9.4,86.0,2.1,1.5,265.0,32.8,10.2,,3.9,24.4,2.1,66.7,,,3.7,307.0,3.3,14.9,3.68,134.0,,64.0,8.9,,7,16,3
129052,1,60.0,0,0,0,0,1,,,,36.500000,36.500000,36.500000,36.500000,0.0,36.500000,36.500000,0.0,0.0,64.0,64.0,64.0,64.0,0.0,64.0,64.0,0.000000,0.000000,16.0,16.0,16.0,16.0,0.0,16.0,16.0,0.000000,0.000000,97.0,97.0,97.0,97.0,0.0,97.0,97.0,0.000000,0.000000,156.0,156.0,156.0,156.0,0.0,156.0,156.0,0.000000,0.000000,75.0,75.0,75.0,75.0,0.0,75.0,75.0,0.000000,0.000000,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9,16,0
129053,0,71.0,0,0,1,0,0,34.9,89.357624,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,,,,21.0,4.1,124.0,64.0,0.4,24.0,0.5,,102.0,6.7,0.5,159.0,39.7,12.4,1.3,3.3,6.6,,89.9,13.9,21.7,,183.0,5.3,13.4,3.82,141.0,,50.0,10.5,,9,11,3
129054,1,73.0,0,1,0,0,0,26.0,75.400598,170.18,35.944444,35.944444,35.944444,35.944444,0.0,35.944444,35.944444,0.0,0.0,78.0,78.0,78.0,78.0,0.0,78.0,78.0,0.000000,0.000000,20.0,20.0,20.0,20.0,0.0,20.0,20.0,0.000000,0.000000,100.0,100.0,100.0,100.0,0.0,100.0,100.0,0.000000,0.000000,134.0,134.0,134.0,134.0,0.0,134.0,134.0,0.000000,0.000000,76.0,76.0,76.0,76.0,0.0,76.0,76.0,0.000000,0.000000,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,12,19,2
