In [1]:
EXTERNAL_FOLDER = "../data/external"
INTERIM_DATA_FOLDER = "../data/interim"
PROCESSED_DATA_FOLDER = "../data/processed"

In [2]:
import pandas as pd
import plotly.graph_objects as go

In [None]:
glucose_norm = pd.read_csv(f"{PROCESSED_DATA_FOLDER}/glucose_norm.csv")
insulin_inputs_in_glucose_pacients = pd.read_csv(f"{INTERIM_DATA_FOLDER}/insulin_inputs_in_glucose_pacients.csv")
bmi_glucose = pd.read_csv(f"{PROCESSED_DATA_FOLDER}/bmi_glucose.csv")
d_items = pd.read_csv(f"{EXTERNAL_FOLDER}/d_items.csv")
patients = pd.read_csv(f"{INTERIM_DATA_FOLDER}/patients.csv")

bmi_norm = pd.read_csv(f"{PROCESSED_DATA_FOLDER}/bmi_norm.csv")
ck_norm = pd.read_csv(f"{PROCESSED_DATA_FOLDER}/ck_norm.csv")
insuline_in_ck_patients = pd.read_csv(f"{INTERIM_DATA_FOLDER}/insulin_inputs_in_ck_pacients.csv")

In [None]:
glucose_norm = (
    glucose_norm
    .merge(d_items, on="itemid", how="left")
    [["subject_id", "hadm_id", "stay_id", "charttime", "valuenum", "valueuom", "itemid", "label", "category"]]
)

glucose_norm['chartdate'] = pd.to_datetime(glucose_norm['charttime'])
glucose_norm['chartdate'] = glucose_norm['chartdate'].dt.strftime('%Y-%m-%d')

bmi_glucose = bmi_glucose[["subject_id", "chartdate", "result_name", "result_value"]]
bmi_glucose = bmi_glucose.groupby("subject_id", as_index=False).first()

glucose = (
    glucose_norm
    .merge(bmi_glucose, on=["subject_id"], how="left")  # Aqui o mais correto deveria ser on=["subject_id", "chartdate"], mas o join dessa forma não funciona, possivelmente por haver inconsistências na definição das datas.
    .dropna(subset=["result_value"])
)
glucose["charttime"] = pd.to_datetime(glucose["charttime"]) 
glucose = glucose.sort_values(['stay_id', 'charttime'])

insulin = (
    insulin_inputs_in_glucose_pacients[["subject_id", "hadm_id", "stay_id", "starttime", "endtime", "itemid", "amount", "amountuom", "rate", "rateuom", "ordercategoryname", "ordercategorydescription", "totalamount", "totalamountuom"]]
)
insulin["charttime"] = pd.to_datetime(insulin["starttime"]) 
insulin = insulin.sort_values(['stay_id', 'charttime'])

patients["died"] = patients["dod"].isna()
patients = patients[["subject_id", "died"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


* Rule 1: A glucose reading should precede a regular insulin administration by up to 90 minutes. Tis basis
for this time window was derived from the diabetic ketoacidosis guidelines which recommend measuring
glucose values every 60minutes while receiving an insulin infusion10. An additional 30minutes were added,
90minutes in total, to this interval to account for the time it may take for providers to register the event. Tese
time intervals are within the recommendations11.

* Rule 2: When a regular insulin event was not preceded, but instead followed, by a blood glucose measurement, this glucose reading was paired with the regular insulin administration if they were recorded within
90minutes of each other.

* Rule 3: Sometimes a regular insulin infusion/bolus appeared between 2 blood glucose measurements. In this
case, the higher glucose value was paired with the regular insulin entry as long as they were entered within
90minutes of each other.

* Rule 4: When a regular insulin bolus occurred very close to a regular insulin infusion rate, it was assumed
that the patient was given a bolus and then commenced on an infusion. Both regular insulin entries were
paired with the preceding blood glucose measurement, or the posterior glucose reading in case its value was
higher than the preceding blood glucose and was entered within 90minutes of the insulin dose.

* Rule 5: No glucose values below 90mg/dL were paired with a subsequent regular insulin bolus or infusion.
No clinician will treat this low of a blood glucose value with a regular insulin bolus or infusion.

In [None]:
glucose_insulin = (
    glucose
    .merge(insulin, on="stay_id", how="left")
)

glucose_insulin = glucose_insulin[
    (glucose_insulin["charttime_x"] - glucose_insulin["charttime_y"] >= pd.Timedelta('-90 minutes')) &
    (glucose_insulin["charttime_x"] - glucose_insulin["charttime_y"] < pd.Timedelta('90 minutes'))
]

In [None]:
glucose_insulin = glucose_insulin[glucose_insulin['valuenum'] >= 90]

In [None]:
def is_intravenous(row):
    if "Non IV" in row['insulin_ordercategoryname']:
        return 0
    return 1

glucose_insulin = glucose_insulin[
    [
        "subject_id_x", 
        "hadm_id_x", 
        "stay_id", 
        "charttime_x", 
        "valuenum", 
        "valueuom", 
        "label", 
        "category", 
        "result_value", 
        "starttime", 
        "endtime", 
        "itemid_y", 
        "amount", 
        "amountuom", 
        "rate", 
        "rateuom", 
        "ordercategoryname", 
        "ordercategorydescription", 
        "totalamount", 
        "totalamountuom"
    ]
]

glucose_insulin = glucose_insulin.rename(
    columns={
        "subject_id_x": "subject_id", 
        "hadm_id_x": "hadm_id",
        "charttime_x": "glucose_charttime",
        "valuenum": "glucose_value", 
        "valueuom": "glucose_valueuom", 
        "result_value": "BMI (km/m2)", 
        "starttime": "insulin_starttime",
        "endtime": "insulin_endtime",
        "itemid_y": "insulin_itemid",
        "amount": "insulin_amount", 
        "amountuom": "insulin_amountuom", 
        "rate": "insulin_rate", 
        "rateuom": "insulin_rateuom", 
        "ordercategoryname": "insulin_ordercategoryname", 
        "ordercategorydescription": "insulin_ordercategorydescription", 
        "totalamount": "insuline_totalamount", 
        "totalamountuom": "insuline_totalamountuom",
    }
)
glucose_insulin["is_overweight"] = (glucose_insulin["BMI (km/m2)"] >= 30).astype(int)
glucose_insulin['is_intravenous'] = glucose_insulin.apply(is_intravenous, axis=1)

In [None]:
glucose_insulin.to_csv(f"{PROCESSED_DATA_FOLDER}/paired_glucose_insulin.csv", index=False)

In [None]:
ck_norm = ck_norm.rename(
    columns={
        "charttime": "ck_charttime", 
        "valuenum": "CK (IU/L)",
    }
)[["stay_id", "ck_charttime", "CK (IU/L)"]]

ck_norm["ck_charttime"] = pd.to_datetime(ck_norm["ck_charttime"]) 

In [None]:
glucose_insulin = (
    glucose_insulin.merge(
        ck_norm, 
        on="stay_id", how="left"
    )
)

glucose_insulin = glucose_insulin[
    (glucose_insulin["glucose_charttime"] - glucose_insulin["ck_charttime"] >= pd.Timedelta('-4 hours')) &
    (glucose_insulin["glucose_charttime"] - glucose_insulin["ck_charttime"] < pd.Timedelta('4 hours'))
]

In [None]:
pd.set_option('display.max_columns', None)
glucose_insulin = glucose_insulin.merge(patients, on="subject_id", how="left")

Unnamed: 0,subject_id,hadm_id,stay_id,glucose_charttime,glucose_value,glucose_valueuom,label,category,BMI (km/m2),insulin_starttime,insulin_endtime,insulin_itemid,insulin_amount,insulin_amountuom,insulin_rate,insulin_rateuom,insulin_ordercategoryname,insulin_ordercategorydescription,insuline_totalamount,insuline_totalamountuom,is_overweight,is_intravenous,ck_charttime,CK (IU/L),died
0,16235911,28956560,30003306,2188-06-06 17:25:00,160.0,mg/dL,Glucose (serum),Labs,24.4,2188-06-06 18:35:00,2188-06-06 18:36:00,223262.0,1.000000,units,,,06-Insulin (Non IV),Drug Push,,,0,0,2188-06-06 17:25:00,422.0,False
1,19714547,25371645,30012243,2183-03-30 12:16:00,257.0,mg/dL,Glucose (serum),Labs,29.7,2183-03-30 11:20:00,2183-03-30 11:21:00,223262.0,6.000000,units,,,06-Insulin (Non IV),Drug Push,,,0,0,2183-03-30 12:16:00,76.0,True
2,19714547,25371645,30012243,2183-03-30 12:16:00,257.0,mg/dL,Glucose (serum),Labs,29.7,2183-03-30 13:35:00,2183-03-30 13:36:00,223262.0,14.000000,units,,,06-Insulin (Non IV),Drug Push,,,0,0,2183-03-30 12:16:00,76.0,True
3,12791590,25733250,30012600,2110-12-05 11:07:00,251.0,mg/dL,Glucose (serum),Labs,37.3,2110-12-05 12:12:00,2110-12-05 12:13:00,223258.0,8.000000,units,,,06-Insulin (Non IV),Drug Push,,,1,0,2110-12-05 11:07:00,5143.0,True
4,19635799,25167602,30024161,2185-08-30 02:08:00,163.0,mg/dL,Glucose (serum),Labs,22.4,2185-08-30 03:27:00,2185-08-30 03:28:00,223262.0,4.000000,units,,,06-Insulin (Non IV),Drug Push,,,0,0,2185-08-30 02:08:00,47.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6489,14890100,24216473,39978664,2129-02-20 20:00:00,101.0,mg/dL,Glucose (whole blood),Labs,37.8,2129-02-20 20:13:00,2129-02-20 22:01:00,223258.0,1.800013,units,1.000007,units/hour,01-Drips,Continuous Med,100.0,ml,1,1,2129-02-20 22:00:00,2251.0,True
6490,14890100,24216473,39978664,2129-02-22 00:20:00,133.0,mg/dL,Glucose (whole blood),Labs,37.8,2129-02-22 00:34:00,2129-02-22 01:45:00,223258.0,1.183333,units,1.000000,units/hour,01-Drips,Continuous Med,100.0,ml,1,1,2129-02-21 23:57:00,2484.0,True
6491,14890100,24216473,39978664,2129-02-22 00:20:00,133.0,mg/dL,Glucose (whole blood),Labs,37.8,2129-02-22 01:45:00,2129-02-22 02:56:00,223258.0,2.367066,units,2.000337,units/hour,01-Drips,Continuous Med,100.0,ml,1,1,2129-02-21 23:57:00,2484.0,True
6492,14890100,24216473,39978664,2129-02-22 05:04:00,102.0,mg/dL,Glucose (serum),Labs,37.8,2129-02-22 05:31:00,2129-02-22 12:42:00,223258.0,7.183303,units,0.999996,units/hour,01-Drips,Continuous Med,100.0,ml,1,1,2129-02-22 05:04:00,2305.0,True


In [None]:
glucose_insulin.to_csv(f"{PROCESSED_DATA_FOLDER}/paired_glucose_insulin_ck_death.csv", index=False)