## Utils

In [15]:
%load_ext autoreload

In [20]:
%autoreload 2

In [25]:
from src.parsers import mimic, hirid
from src.modeling import discovery, plots, querier
from src.utils import constants
from scipy import stats
import numpy as np

In [2]:
import sys
# import win32com.client
import os
import pandas as pd

def setup_io_config(root_path):
    """
    Input - Output config. Add dataset paths
    :root_path -> Repo path which contains 'data' and 'res' folders
    """

    # MIMIC
    is_shortcut = True if "data.lnk" in os.listdir(root_path) else False 
    
    if (is_shortcut):
        path_shortcut =  os.path.join(root_path, "data.lnk")
        shell = win32com.client.Dispatch("WScript.Shell")
        mimic_data = shell.CreateShortCut(path_shortcut).Targetpath
    else:
        mimic_data = os.path.join(f"{root_path}", "data") 
    mimic_path = os.path.join(f"{root_path}", "results")

    # HIRID
    hirid_data = f'{root_path}/data/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/'
    hirid_path = f'{root_path}/data/hirid-a-high-time-resolution-icu-dataset-1.1.1'
    
    return mimic_data, mimic_path, hirid_data, hirid_path

def setup_stratification_config():
    gender="MF"
    age_b=40
    age_a=80 
    ethnicity="WHITE" 
    lab_mapping= constants.LAB_MAPPING
    b_w = [(0,6), (6,12)]
    a_w = [(0,4), (4,8), (8,12)]
    before_windows = b_w
    after_windows = a_w
    return gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows

## Systolic Blood Pressure

In [3]:
# IO Config
# root_path ="C:\\Users\\danco\\My Drive\\Master\\Datasets\\MIMIC iii"
root_path = "/Users/pavan/Library/CloudStorage/GoogleDrive-f20190038@hyderabad.bits-pilani.ac.in/My Drive/TAU/Code/DrugLab"
data, res, raw_path, res_path = setup_io_config(root_path=root_path)

# Stratification Config
gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows = setup_stratification_config()

In [36]:
vital_name = "Systolic blood pressure"

In [21]:
# med_lab_pairs = pd.read_csv(os.path.join(data, "mimiciii/1.4", "preprocessed", "Systolic blood pressure_lab_patient_data_mimic_extract_2.csv")).drop(columns=["Unnamed: 0"])
med_lab_pairs = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name}.csv")).drop(columns=["Unnamed: 0"])

In [27]:
for b_w in before_windows:
    for a_w in after_windows:
        print(b_w, a_w)
        med_lab_pairs[f"ratio_{b_w}_{a_w}"] = med_lab_pairs[f"after_abs_{a_w}_sp"] / med_lab_pairs[f"before_abs_{b_w}_sp"]

(0, 6) (0, 4)
(0, 6) (4, 8)
(0, 6) (8, 12)
(6, 12) (0, 4)
(6, 12) (4, 8)
(6, 12) (8, 12)


In [30]:
med_lab_pairs.dropna(subset=["ratio_(0, 6)_(0, 4)"])[["ratio_(0, 6)_(0, 4)"]]

Unnamed: 0,"ratio_(0, 6)_(0, 4)"
0,1.050000
1,1.050000
2,1.050000
5,1.142857
6,0.946237
...,...
136151,1.247191
136152,0.954955
136153,0.936000
136154,1.034188


In [None]:
# # Original size of med lab pairs - 1826730
# for a_w in after_windows:
#     for b_w in before_windows:
#         med_lab_pairs = pd.concat([ med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]<1) & (med_lab_pairs[f"ratio_{b_w}_{a_w}"]!=1)], med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]>=1) | (med_lab_pairs[f"after_time_{a_w}_sp"].isna())] ])

In [31]:
med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,HADM_ID
MED_NAME,LAB_NAME,Unnamed: 2_level_1
ACD-A Citrate (1000ml),Systolic blood pressure,147
ACD-A Citrate (500ml),Systolic blood pressure,5
Abciximab (Reopro),Systolic blood pressure,2
Acetaminophen-IV,Systolic blood pressure,400
Acyclovir,Systolic blood pressure,210
...,...,...
Verapamil,Systolic blood pressure,9
Vitamin K (Phytonadione),Systolic blood pressure,93
Vivonex (1/2),Systolic blood pressure,1
Vivonex (Full),Systolic blood pressure,42


In [32]:
pairs_df = med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]
pairs = pairs_df[pairs_df["HADM_ID"]>100].index
discovery_res1 = []
for med_name, lab_name in pairs:
    stat_test_df = []
    for a_w in after_windows:
        for b_w in before_windows:
            vals = med_lab_pairs[med_lab_pairs["LAB_NAME"]==lab_name]
            vals = vals[vals["MED_NAME"]==med_name]
            vals = vals[f"ratio_{b_w}_{a_w}"].replace([np.inf, -np.inf], np.nan).dropna()
            if vals.shape[0]>0:
                res = stats.ttest_1samp(vals.to_numpy(), popmean=1)
                row = {
                    "Med Name": med_name,
                    "Lab Name": lab_name,
                    "Before Window (in Hours)": b_w,
                    "After Window (in Hours)": a_w,
                    "No. of Patients": vals.shape[0],
                    "1-Sampled Ttest" : res.pvalue
                }
            stat_test_df.append(row)
    if len(stat_test_df)>0:
        discovery_res1.extend(stat_test_df)
res_df1 = pd.DataFrame(discovery_res1)

In [33]:
res_df = res_df1.copy()

In [34]:
res_df[res_df["No. of Patients"]>100]

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
6,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(0, 4)",328,4.413339e-08
7,Acetaminophen-IV,Systolic blood pressure,"(6, 12)","(0, 4)",233,9.361079e-01
8,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(4, 8)",311,1.695153e-04
9,Acetaminophen-IV,Systolic blood pressure,"(6, 12)","(4, 8)",217,2.629712e-01
10,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(8, 12)",213,4.635270e-01
...,...,...,...,...,...,...
742,Vasopressin,Systolic blood pressure,"(0, 6)","(8, 12)",102,8.988038e-01
744,Vecuronium,Systolic blood pressure,"(0, 6)","(0, 4)",129,1.616918e-03
745,Vecuronium,Systolic blood pressure,"(6, 12)","(0, 4)",102,1.866225e-03
746,Vecuronium,Systolic blood pressure,"(0, 6)","(4, 8)",111,1.091981e-03


In [37]:
res_df[res_df["No. of Patients"]>100].to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv"))

In [None]:
res_df = pd.read_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv")).drop(columns=["Unnamed: 0"]).drop_duplicates()

In [38]:
dis_analyzer = discovery.ClinicalDiscoveryAnalysis([])

In [39]:
test_pval_data, significant_hard_thres, significant_bonferroni, significant_fdr = dis_analyzer.generate_significant(res_df[res_df["No. of Patients"]>100].dropna(), statistical_test="1-Sampled Ttest")

In [42]:
test_pval_data

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(0, 4)",328,4.413339e-08,0.000024,5.655232e-07
1,Acetaminophen-IV,Systolic blood pressure,"(6, 12)","(0, 4)",233,9.361079e-01,1.000000,9.571344e-01
2,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(4, 8)",311,1.695153e-04,0.093403,8.193241e-04
3,Acetaminophen-IV,Systolic blood pressure,"(6, 12)","(4, 8)",217,2.629712e-01,1.000000,3.932012e-01
4,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(8, 12)",213,4.635270e-01,1.000000,5.995385e-01
...,...,...,...,...,...,...,...,...
546,Vasopressin,Systolic blood pressure,"(0, 6)","(8, 12)",102,8.988038e-01,1.000000,9.417096e-01
547,Vecuronium,Systolic blood pressure,"(0, 6)","(0, 4)",129,1.616918e-03,0.890922,6.060693e-03
548,Vecuronium,Systolic blood pressure,"(6, 12)","(0, 4)",102,1.866225e-03,1.000000,6.821420e-03
549,Vecuronium,Systolic blood pressure,"(0, 6)","(4, 8)",111,1.091981e-03,0.601682,4.328645e-03


In [41]:
significant_hard_thres.shape, significant_bonferroni.shape, significant_fdr.shape

((195, 8), (88, 8), (162, 8))

In [43]:
merged_sig_pairs = pd.merge(pd.merge(significant_fdr, significant_bonferroni, how="inner"), significant_hard_thres, how="inner")

In [44]:
merged_sig_pairs

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(0, 4)",328,4.413339e-08,2.431750e-05,5.655232e-07
1,Albumin 5%,Systolic blood pressure,"(0, 6)","(0, 4)",991,1.172105e-05,6.458300e-03,7.781085e-05
2,Amiodarone,Systolic blood pressure,"(0, 6)","(4, 8)",327,1.622566e-05,8.940337e-03,1.039574e-04
3,Calcium Gluconate,Systolic blood pressure,"(0, 6)","(0, 4)",2798,3.399097e-13,1.872902e-10,9.364511e-12
4,Calcium Gluconate,Systolic blood pressure,"(6, 12)","(0, 4)",1838,7.193575e-17,3.963660e-14,3.048969e-15
...,...,...,...,...,...,...,...,...
83,Platelets,Systolic blood pressure,"(6, 12)","(0, 4)",290,2.088772e-07,1.150914e-04,2.397737e-06
84,Potassium Chloride,Systolic blood pressure,"(0, 6)","(4, 8)",1768,3.559338e-07,1.961195e-04,3.700368e-06
85,Pre-Admission Intake,Systolic blood pressure,"(0, 6)","(0, 4)",1245,2.833562e-10,1.561293e-07,5.036429e-09
86,Pre-Admission Intake,Systolic blood pressure,"(0, 6)","(4, 8)",1202,1.357516e-08,7.479915e-06,1.869979e-07


In [45]:
merged_sig_pairs.to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"sig_pairs_intersection_mimic_vital_sign_{vital_name}.csv"))

## Heart Rate

In [48]:
# IO Config
# root_path ="C:\\Users\\danco\\My Drive\\Master\\Datasets\\MIMIC iii"
root_path = "/Users/pavan/Library/CloudStorage/GoogleDrive-f20190038@hyderabad.bits-pilani.ac.in/My Drive/TAU/Code/DrugLab"
data, res, raw_path, res_path = setup_io_config(root_path=root_path)

# Stratification Config
gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows = setup_stratification_config()

In [46]:
vital_name = "Heart Rate"

In [49]:
# med_lab_pairs = pd.read_csv(os.path.join(data, "mimiciii/1.4", "preprocessed", "Systolic blood pressure_lab_patient_data_mimic_extract_2.csv")).drop(columns=["Unnamed: 0"])
med_lab_pairs = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name}.csv")).drop(columns=["Unnamed: 0"])

In [50]:
for b_w in before_windows:
    for a_w in after_windows:
        print(b_w, a_w)
        med_lab_pairs[f"ratio_{b_w}_{a_w}"] = med_lab_pairs[f"after_abs_{a_w}_sp"] / med_lab_pairs[f"before_abs_{b_w}_sp"]

(0, 6) (0, 4)
(0, 6) (4, 8)
(0, 6) (8, 12)
(6, 12) (0, 4)
(6, 12) (4, 8)
(6, 12) (8, 12)


In [52]:
med_lab_pairs.dropna(subset=["ratio_(0, 6)_(0, 4)"])[["ratio_(0, 6)_(0, 4)"]].describe()

Unnamed: 0,"ratio_(0, 6)_(0, 4)"
count,116276.0
mean,inf
std,
min,0.0
25%,0.9545455
50%,1.0
75%,1.037975
max,inf


In [None]:
# # Original size of med lab pairs - 1826730
# for a_w in after_windows:
#     for b_w in before_windows:
#         med_lab_pairs = pd.concat([ med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]<1) & (med_lab_pairs[f"ratio_{b_w}_{a_w}"]!=1)], med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]>=1) | (med_lab_pairs[f"after_time_{a_w}_sp"].isna())] ])

In [53]:
med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,HADM_ID
MED_NAME,LAB_NAME,Unnamed: 2_level_1
ACD-A Citrate (1000ml),Heart Rate,147
ACD-A Citrate (500ml),Heart Rate,5
Abciximab (Reopro),Heart Rate,2
Acetaminophen-IV,Heart Rate,400
Acyclovir,Heart Rate,210
...,...,...
Verapamil,Heart Rate,9
Vitamin K (Phytonadione),Heart Rate,93
Vivonex (1/2),Heart Rate,1
Vivonex (Full),Heart Rate,42


In [54]:
pairs_df = med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]
pairs = pairs_df[pairs_df["HADM_ID"]>100].index
discovery_res1 = []
for med_name, lab_name in pairs:
    stat_test_df = []
    for a_w in after_windows:
        for b_w in before_windows:
            vals = med_lab_pairs[med_lab_pairs["LAB_NAME"]==lab_name]
            vals = vals[vals["MED_NAME"]==med_name]
            vals = vals[f"ratio_{b_w}_{a_w}"].replace([np.inf, -np.inf], np.nan).dropna()
            if vals.shape[0]>0:
                res = stats.ttest_1samp(vals.to_numpy(), popmean=1)
                row = {
                    "Med Name": med_name,
                    "Lab Name": lab_name,
                    "Before Window (in Hours)": b_w,
                    "After Window (in Hours)": a_w,
                    "No. of Patients": vals.shape[0],
                    "1-Sampled Ttest" : res.pvalue
                }
            stat_test_df.append(row)
    if len(stat_test_df)>0:
        discovery_res1.extend(stat_test_df)
res_df1 = pd.DataFrame(discovery_res1)

In [55]:
res_df = res_df1.copy()

In [56]:
res_df[res_df["No. of Patients"]>100]

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
6,Acetaminophen-IV,Heart Rate,"(0, 6)","(0, 4)",328,8.586671e-06
7,Acetaminophen-IV,Heart Rate,"(6, 12)","(0, 4)",234,1.782672e-01
8,Acetaminophen-IV,Heart Rate,"(0, 6)","(4, 8)",313,2.521195e-11
9,Acetaminophen-IV,Heart Rate,"(6, 12)","(4, 8)",220,2.763394e-02
10,Acetaminophen-IV,Heart Rate,"(0, 6)","(8, 12)",212,1.942681e-07
...,...,...,...,...,...,...
742,Vasopressin,Heart Rate,"(0, 6)","(8, 12)",103,8.314304e-01
744,Vecuronium,Heart Rate,"(0, 6)","(0, 4)",130,1.644481e-01
745,Vecuronium,Heart Rate,"(6, 12)","(0, 4)",102,9.960966e-01
746,Vecuronium,Heart Rate,"(0, 6)","(4, 8)",112,1.272466e-01


In [57]:
res_df[res_df["No. of Patients"]>100].to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv"))

In [58]:
# res_df = pd.read_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv")).drop(columns=["Unnamed: 0"]).drop_duplicates()

In [59]:
dis_analyzer = discovery.ClinicalDiscoveryAnalysis([])

In [60]:
test_pval_data, significant_hard_thres, significant_bonferroni, significant_fdr = dis_analyzer.generate_significant(res_df[res_df["No. of Patients"]>100].dropna(), statistical_test="1-Sampled Ttest")

In [61]:
test_pval_data

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Heart Rate,"(0, 6)","(0, 4)",328,8.586671e-06,4.731256e-03,7.392587e-05
1,Acetaminophen-IV,Heart Rate,"(6, 12)","(0, 4)",234,1.782672e-01,1.000000e+00,3.571827e-01
2,Acetaminophen-IV,Heart Rate,"(0, 6)","(4, 8)",313,2.521195e-11,1.389178e-08,5.342993e-10
3,Acetaminophen-IV,Heart Rate,"(6, 12)","(4, 8)",220,2.763394e-02,1.000000e+00,8.459055e-02
4,Acetaminophen-IV,Heart Rate,"(0, 6)","(8, 12)",212,1.942681e-07,1.070417e-04,2.277483e-06
...,...,...,...,...,...,...,...,...
546,Vasopressin,Heart Rate,"(0, 6)","(8, 12)",103,8.314304e-01,1.000000e+00,9.035861e-01
547,Vecuronium,Heart Rate,"(0, 6)","(0, 4)",130,1.644481e-01,1.000000e+00,3.419280e-01
548,Vecuronium,Heart Rate,"(6, 12)","(0, 4)",102,9.960966e-01,1.000000e+00,9.962970e-01
549,Vecuronium,Heart Rate,"(0, 6)","(4, 8)",112,1.272466e-01,1.000000e+00,2.818406e-01


In [62]:
significant_hard_thres.shape, significant_bonferroni.shape, significant_fdr.shape

((148, 8), (66, 8), (106, 8))

In [63]:
merged_sig_pairs = pd.merge(pd.merge(significant_fdr, significant_bonferroni, how="inner"), significant_hard_thres, how="inner")

In [64]:
merged_sig_pairs

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Heart Rate,"(0, 6)","(0, 4)",328,8.586671e-06,4.731256e-03,7.392587e-05
1,Acetaminophen-IV,Heart Rate,"(0, 6)","(4, 8)",313,2.521195e-11,1.389178e-08,5.342993e-10
2,Acetaminophen-IV,Heart Rate,"(0, 6)","(8, 12)",212,1.942681e-07,1.070417e-04,2.277483e-06
3,Amiodarone,Heart Rate,"(6, 12)","(0, 4)",344,6.573602e-08,3.622055e-05,8.423383e-07
4,Amiodarone,Heart Rate,"(0, 6)","(4, 8)",328,6.882087e-18,3.792030e-15,3.415409e-16
...,...,...,...,...,...,...,...,...
61,Potassium Chloride,Heart Rate,"(0, 6)","(4, 8)",1775,1.033733e-09,5.695868e-07,1.627391e-08
62,Potassium Chloride,Heart Rate,"(0, 6)","(8, 12)",1399,8.541527e-07,4.706381e-04,9.032774e-06
63,Pre-Admission Intake,Heart Rate,"(0, 6)","(4, 8)",1262,4.911977e-07,2.706499e-04,5.412998e-06
64,Pre-Admission Intake,Heart Rate,"(0, 6)","(8, 12)",1245,3.183580e-11,1.754152e-08,6.264830e-10


In [65]:
merged_sig_pairs.to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"sig_pairs_intersection_mimic_vital_sign_{vital_name}.csv"))

## Diastolic blood pressure

In [66]:
# IO Config
# root_path ="C:\\Users\\danco\\My Drive\\Master\\Datasets\\MIMIC iii"
root_path = "/Users/pavan/Library/CloudStorage/GoogleDrive-f20190038@hyderabad.bits-pilani.ac.in/My Drive/TAU/Code/DrugLab"
data, res, raw_path, res_path = setup_io_config(root_path=root_path)

# Stratification Config
gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows = setup_stratification_config()

In [67]:
vital_name = "Diastolic blood pressure"

In [68]:
# med_lab_pairs = pd.read_csv(os.path.join(data, "mimiciii/1.4", "preprocessed", "Systolic blood pressure_lab_patient_data_mimic_extract_2.csv")).drop(columns=["Unnamed: 0"])
med_lab_pairs = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name}.csv")).drop(columns=["Unnamed: 0"])

In [69]:
for b_w in before_windows:
    for a_w in after_windows:
        print(b_w, a_w)
        med_lab_pairs[f"ratio_{b_w}_{a_w}"] = med_lab_pairs[f"after_abs_{a_w}_sp"] / med_lab_pairs[f"before_abs_{b_w}_sp"]

(0, 6) (0, 4)
(0, 6) (4, 8)
(0, 6) (8, 12)
(6, 12) (0, 4)
(6, 12) (4, 8)
(6, 12) (8, 12)


In [70]:
med_lab_pairs.dropna(subset=["ratio_(0, 6)_(0, 4)"])[["ratio_(0, 6)_(0, 4)"]].describe()

Unnamed: 0,"ratio_(0, 6)_(0, 4)"
count,115904.0
mean,1.034539
std,5.44029
min,0.0
25%,0.90625
50%,1.0
75%,1.083333
max,1271.269231


In [71]:
# # Original size of med lab pairs - 1826730
# for a_w in after_windows:
#     for b_w in before_windows:
#         med_lab_pairs = pd.concat([ med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]<1) & (med_lab_pairs[f"ratio_{b_w}_{a_w}"]!=1)], med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]>=1) | (med_lab_pairs[f"after_time_{a_w}_sp"].isna())] ])

In [72]:
med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,HADM_ID
MED_NAME,LAB_NAME,Unnamed: 2_level_1
ACD-A Citrate (1000ml),Diastolic blood pressure,147
ACD-A Citrate (500ml),Diastolic blood pressure,5
Abciximab (Reopro),Diastolic blood pressure,2
Acetaminophen-IV,Diastolic blood pressure,400
Acyclovir,Diastolic blood pressure,210
...,...,...
Verapamil,Diastolic blood pressure,9
Vitamin K (Phytonadione),Diastolic blood pressure,93
Vivonex (1/2),Diastolic blood pressure,1
Vivonex (Full),Diastolic blood pressure,42


In [73]:
pairs_df = med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]
pairs = pairs_df[pairs_df["HADM_ID"]>100].index
discovery_res1 = []
for med_name, lab_name in pairs:
    stat_test_df = []
    for a_w in after_windows:
        for b_w in before_windows:
            vals = med_lab_pairs[med_lab_pairs["LAB_NAME"]==lab_name]
            vals = vals[vals["MED_NAME"]==med_name]
            vals = vals[f"ratio_{b_w}_{a_w}"].replace([np.inf, -np.inf], np.nan).dropna()
            if vals.shape[0]>0:
                res = stats.ttest_1samp(vals.to_numpy(), popmean=1)
                row = {
                    "Med Name": med_name,
                    "Lab Name": lab_name,
                    "Before Window (in Hours)": b_w,
                    "After Window (in Hours)": a_w,
                    "No. of Patients": vals.shape[0],
                    "1-Sampled Ttest" : res.pvalue
                }
            stat_test_df.append(row)
    if len(stat_test_df)>0:
        discovery_res1.extend(stat_test_df)
res_df1 = pd.DataFrame(discovery_res1)

In [74]:
res_df = res_df1.copy()

In [75]:
res_df[res_df["No. of Patients"]>100]

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
6,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(0, 4)",328,0.000003
7,Acetaminophen-IV,Diastolic blood pressure,"(6, 12)","(0, 4)",233,0.524005
8,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(4, 8)",311,0.000013
9,Acetaminophen-IV,Diastolic blood pressure,"(6, 12)","(4, 8)",217,0.959806
10,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(8, 12)",213,0.071872
...,...,...,...,...,...,...
742,Vasopressin,Diastolic blood pressure,"(0, 6)","(8, 12)",102,0.452940
744,Vecuronium,Diastolic blood pressure,"(0, 6)","(0, 4)",129,0.003279
745,Vecuronium,Diastolic blood pressure,"(6, 12)","(0, 4)",102,0.000179
746,Vecuronium,Diastolic blood pressure,"(0, 6)","(4, 8)",111,0.027241


In [76]:
res_df[res_df["No. of Patients"]>100].to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv"))

In [77]:
# res_df = pd.read_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv")).drop(columns=["Unnamed: 0"]).drop_duplicates()

In [78]:
dis_analyzer = discovery.ClinicalDiscoveryAnalysis([])

In [79]:
test_pval_data, significant_hard_thres, significant_bonferroni, significant_fdr = dis_analyzer.generate_significant(res_df[res_df["No. of Patients"]>100].dropna(), statistical_test="1-Sampled Ttest")

In [80]:
test_pval_data

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(0, 4)",328,0.000003,0.001575,0.000043
1,Acetaminophen-IV,Diastolic blood pressure,"(6, 12)","(0, 4)",233,0.524005,1.000000,0.669352
2,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(4, 8)",311,0.000013,0.007048,0.000160
3,Acetaminophen-IV,Diastolic blood pressure,"(6, 12)","(4, 8)",217,0.959806,1.000000,0.974489
4,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(8, 12)",213,0.071872,1.000000,0.152314
...,...,...,...,...,...,...,...,...
546,Vasopressin,Diastolic blood pressure,"(0, 6)","(8, 12)",102,0.452940,1.000000,0.598489
547,Vecuronium,Diastolic blood pressure,"(0, 6)","(0, 4)",129,0.003279,1.000000,0.015310
548,Vecuronium,Diastolic blood pressure,"(6, 12)","(0, 4)",102,0.000179,0.098438,0.001348
549,Vecuronium,Diastolic blood pressure,"(0, 6)","(4, 8)",111,0.027241,1.000000,0.078587


In [81]:
significant_hard_thres.shape, significant_bonferroni.shape, significant_fdr.shape

((155, 8), (47, 8), (108, 8))

In [82]:
merged_sig_pairs = pd.merge(pd.merge(significant_fdr, significant_bonferroni, how="inner"), significant_hard_thres, how="inner")

In [83]:
merged_sig_pairs

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(0, 4)",328,2.857927e-06,0.001574718,4.255994e-05
1,Acetaminophen-IV,Diastolic blood pressure,"(0, 6)","(4, 8)",311,1.279211e-05,0.007048455,0.0001601922
2,Albumin 25%,Diastolic blood pressure,"(0, 6)","(8, 12)",238,5.800048e-06,0.003195826,7.989566e-05
3,Calcium Gluconate,Diastolic blood pressure,"(0, 6)","(0, 4)",2798,1.47975e-09,8.153421e-07,4.529678e-08
4,Cath Lab Intake,Diastolic blood pressure,"(0, 6)","(8, 12)",263,2.660828e-08,1.466116e-05,6.374419e-07
5,Furosemide (Lasix),Diastolic blood pressure,"(0, 6)","(0, 4)",2658,9.015555e-08,4.967571e-05,1.910604e-06
6,Furosemide (Lasix),Diastolic blood pressure,"(6, 12)","(0, 4)",2362,1.1550589999999999e-20,6.364374e-18,1.591094e-18
7,Furosemide (Lasix),Diastolic blood pressure,"(6, 12)","(4, 8)",1922,3.860189e-07,0.0002126964,6.646763e-06
8,Furosemide (Lasix),Diastolic blood pressure,"(6, 12)","(8, 12)",1344,4.354707e-08,2.399443e-05,9.997681e-07
9,Gastric Meds,Diastolic blood pressure,"(6, 12)","(8, 12)",894,2.058511e-06,0.00113424,3.150666e-05


In [84]:
merged_sig_pairs.to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"sig_pairs_intersection_mimic_vital_sign_{vital_name}.csv"))

## Respiratory rate

In [85]:
# IO Config
# root_path ="C:\\Users\\danco\\My Drive\\Master\\Datasets\\MIMIC iii"
root_path = "/Users/pavan/Library/CloudStorage/GoogleDrive-f20190038@hyderabad.bits-pilani.ac.in/My Drive/TAU/Code/DrugLab"
data, res, raw_path, res_path = setup_io_config(root_path=root_path)

# Stratification Config
gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows = setup_stratification_config()

In [86]:
vital_name = "Respiratory rate"

In [87]:
# med_lab_pairs = pd.read_csv(os.path.join(data, "mimiciii/1.4", "preprocessed", "Systolic blood pressure_lab_patient_data_mimic_extract_2.csv")).drop(columns=["Unnamed: 0"])
med_lab_pairs = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name}.csv")).drop(columns=["Unnamed: 0"])

In [88]:
for b_w in before_windows:
    for a_w in after_windows:
        print(b_w, a_w)
        med_lab_pairs[f"ratio_{b_w}_{a_w}"] = med_lab_pairs[f"after_abs_{a_w}_sp"] / med_lab_pairs[f"before_abs_{b_w}_sp"]

(0, 6) (0, 4)
(0, 6) (4, 8)
(0, 6) (8, 12)
(6, 12) (0, 4)
(6, 12) (4, 8)
(6, 12) (8, 12)


In [89]:
med_lab_pairs.dropna(subset=["ratio_(0, 6)_(0, 4)"])[["ratio_(0, 6)_(0, 4)"]].describe()

Unnamed: 0,"ratio_(0, 6)_(0, 4)"
count,115490.0
mean,inf
std,
min,0.0
25%,0.8695652
50%,1.0
75%,1.166667
max,inf


In [90]:
# # Original size of med lab pairs - 1826730
# for a_w in after_windows:
#     for b_w in before_windows:
#         med_lab_pairs = pd.concat([ med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]<1) & (med_lab_pairs[f"ratio_{b_w}_{a_w}"]!=1)], med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]>=1) | (med_lab_pairs[f"after_time_{a_w}_sp"].isna())] ])

In [91]:
med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,HADM_ID
MED_NAME,LAB_NAME,Unnamed: 2_level_1
ACD-A Citrate (1000ml),Respiratory rate,147
ACD-A Citrate (500ml),Respiratory rate,5
Abciximab (Reopro),Respiratory rate,2
Acetaminophen-IV,Respiratory rate,399
Acyclovir,Respiratory rate,210
...,...,...
Verapamil,Respiratory rate,9
Vitamin K (Phytonadione),Respiratory rate,93
Vivonex (1/2),Respiratory rate,1
Vivonex (Full),Respiratory rate,42


In [92]:
pairs_df = med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]
pairs = pairs_df[pairs_df["HADM_ID"]>100].index
discovery_res1 = []
for med_name, lab_name in pairs:
    stat_test_df = []
    for a_w in after_windows:
        for b_w in before_windows:
            vals = med_lab_pairs[med_lab_pairs["LAB_NAME"]==lab_name]
            vals = vals[vals["MED_NAME"]==med_name]
            vals = vals[f"ratio_{b_w}_{a_w}"].replace([np.inf, -np.inf], np.nan).dropna()
            if vals.shape[0]>0:
                res = stats.ttest_1samp(vals.to_numpy(), popmean=1)
                row = {
                    "Med Name": med_name,
                    "Lab Name": lab_name,
                    "Before Window (in Hours)": b_w,
                    "After Window (in Hours)": a_w,
                    "No. of Patients": vals.shape[0],
                    "1-Sampled Ttest" : res.pvalue
                }
            stat_test_df.append(row)
    if len(stat_test_df)>0:
        discovery_res1.extend(stat_test_df)
res_df1 = pd.DataFrame(discovery_res1)

In [93]:
res_df = res_df1.copy()

In [94]:
res_df[res_df["No. of Patients"]>100]

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
6,Acetaminophen-IV,Respiratory rate,"(0, 6)","(0, 4)",313,0.565835
7,Acetaminophen-IV,Respiratory rate,"(6, 12)","(0, 4)",216,0.095535
8,Acetaminophen-IV,Respiratory rate,"(0, 6)","(4, 8)",300,0.050215
9,Acetaminophen-IV,Respiratory rate,"(6, 12)","(4, 8)",202,0.101074
10,Acetaminophen-IV,Respiratory rate,"(0, 6)","(8, 12)",203,0.590981
...,...,...,...,...,...,...
737,Vancomycin,Respiratory rate,"(6, 12)","(8, 12)",1556,0.002264
738,Vasopressin,Respiratory rate,"(0, 6)","(0, 4)",201,0.530840
739,Vasopressin,Respiratory rate,"(6, 12)","(0, 4)",173,0.603075
740,Vasopressin,Respiratory rate,"(0, 6)","(4, 8)",110,0.915826


In [95]:
res_df[res_df["No. of Patients"]>100].to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv"))

In [96]:
# res_df = pd.read_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv")).drop(columns=["Unnamed: 0"]).drop_duplicates()

In [97]:
dis_analyzer = discovery.ClinicalDiscoveryAnalysis([])

In [98]:
test_pval_data, significant_hard_thres, significant_bonferroni, significant_fdr = dis_analyzer.generate_significant(res_df[res_df["No. of Patients"]>100].dropna(), statistical_test="1-Sampled Ttest")

In [99]:
test_pval_data

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Respiratory rate,"(0, 6)","(0, 4)",313,0.565835,1.0,0.687303
1,Acetaminophen-IV,Respiratory rate,"(6, 12)","(0, 4)",216,0.095535,1.0,0.224206
2,Acetaminophen-IV,Respiratory rate,"(0, 6)","(4, 8)",300,0.050215,1.0,0.142254
3,Acetaminophen-IV,Respiratory rate,"(6, 12)","(4, 8)",202,0.101074,1.0,0.235088
4,Acetaminophen-IV,Respiratory rate,"(0, 6)","(8, 12)",203,0.590981,1.0,0.704579
...,...,...,...,...,...,...,...,...
516,Vancomycin,Respiratory rate,"(6, 12)","(8, 12)",1556,0.002264,1.0,0.015527
517,Vasopressin,Respiratory rate,"(0, 6)","(0, 4)",201,0.530840,1.0,0.660065
518,Vasopressin,Respiratory rate,"(6, 12)","(0, 4)",173,0.603075,1.0,0.715723
519,Vasopressin,Respiratory rate,"(0, 6)","(4, 8)",110,0.915826,1.0,0.946717


In [100]:
significant_hard_thres.shape, significant_bonferroni.shape, significant_fdr.shape

((110, 8), (23, 8), (65, 8))

In [101]:
merged_sig_pairs = pd.merge(pd.merge(significant_fdr, significant_bonferroni, how="inner"), significant_hard_thres, how="inner")

In [102]:
merged_sig_pairs

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Calcium Gluconate,Respiratory rate,"(0, 6)","(8, 12)",1755,9.019575e-08,4.699199e-05,3.915999e-06
1,Calcium Gluconate,Respiratory rate,"(6, 12)","(8, 12)",1198,1.187049e-05,0.006184525,0.0002811148
2,Dopamine,Respiratory rate,"(0, 6)","(0, 4)",183,6.662744e-06,0.00347129,0.0001735645
3,Furosemide (Lasix),Respiratory rate,"(6, 12)","(0, 4)",2241,6.047381e-14,3.150685e-11,1.050228e-11
4,Furosemide (Lasix),Respiratory rate,"(6, 12)","(4, 8)",1827,9.925654e-10,5.171266e-07,8.618776e-08
5,Furosemide (Lasix),Respiratory rate,"(6, 12)","(8, 12)",1268,3.660026e-08,1.906874e-05,1.906874e-06
6,Hydralazine,Respiratory rate,"(6, 12)","(0, 4)",707,3.909062e-12,2.036621e-09,5.091554e-10
7,Insulin - Glargine,Respiratory rate,"(0, 6)","(4, 8)",1349,1.400057e-09,7.294297e-07,1.042042e-07
8,Insulin - Glargine,Respiratory rate,"(6, 12)","(4, 8)",1229,1.272572e-08,6.630098e-06,7.366776e-07
9,Insulin - Glargine,Respiratory rate,"(0, 6)","(8, 12)",1097,1.503962e-06,0.000783564,4.897275e-05


In [103]:
merged_sig_pairs.to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"sig_pairs_intersection_mimic_vital_sign_{vital_name}.csv"))

## Temperature

In [164]:
# IO Config
# root_path ="C:\\Users\\danco\\My Drive\\Master\\Datasets\\MIMIC iii"
root_path = "/Users/pavan/Library/CloudStorage/GoogleDrive-f20190038@hyderabad.bits-pilani.ac.in/My Drive/TAU/Code/DrugLab"
data, res, raw_path, res_path = setup_io_config(root_path=root_path)

# Stratification Config
gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows = setup_stratification_config()

In [105]:
vital_name = "Temperature"

In [125]:
# med_lab_pairs_1 = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name} (C).csv")).drop(columns=["Unnamed: 0"])
# med_lab_pairs_2 = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name} (F).csv")).drop(columns=["Unnamed: 0"])

  med_lab_pairs_2 = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name} (F).csv")).drop(columns=["Unnamed: 0"])


In [127]:
# for a_w in after_windows:
#     med_lab_pairs_2[f"after_abs_{a_w}_sp"] = med_lab_pairs_2[f"after_abs_{a_w}_sp"].apply(lambda x : (x-32)*5/9)
# for b_w in before_windows:
#     med_lab_pairs_2[f"before_abs_{b_w}_sp"] = med_lab_pairs_2[f"before_abs_{b_w}_sp"].apply(lambda x : (x-32)*5/9)
        

In [134]:
# med_lab_pairs = pd.concat([med_lab_pairs_1, med_lab_pairs_2]).reset_index().drop(columns=["index"])

In [157]:
# med_lab_pairs["LAB_NAME"] = med_lab_pairs["LAB_NAME"].apply(lambda x : vital_name)

In [167]:
med_lab_pairs = pd.read_csv(os.path.join(res, f"med_lab_pairs_vital_signs_{vital_name}.csv")).drop(columns=["Unnamed: 0"])

In [168]:
for b_w in before_windows:
    for a_w in after_windows:
        print(b_w, a_w)
        med_lab_pairs[f"ratio_{b_w}_{a_w}"] = med_lab_pairs[f"after_abs_{a_w}_sp"] / med_lab_pairs[f"before_abs_{b_w}_sp"]

(0, 6) (0, 4)
(0, 6) (4, 8)
(0, 6) (8, 12)
(6, 12) (0, 4)
(6, 12) (4, 8)
(6, 12) (8, 12)


In [169]:
med_lab_pairs.dropna(subset=["ratio_(0, 6)_(0, 4)"])[["ratio_(0, 6)_(0, 4)"]].describe()

Unnamed: 0,"ratio_(0, 6)_(0, 4)"
count,83127.0
mean,inf
std,
min,-2.795652
25%,0.9930545
50%,1.0
75%,1.008403
max,inf


In [170]:
# # Original size of med lab pairs - 1826730
# for a_w in after_windows:
#     for b_w in before_windows:
#         med_lab_pairs = pd.concat([ med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]<1) & (med_lab_pairs[f"ratio_{b_w}_{a_w}"]!=1)], med_lab_pairs[(med_lab_pairs[f"after_time_{a_w}_sp"]>=1) | (med_lab_pairs[f"after_time_{a_w}_sp"].isna())] ])

In [171]:
med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,HADM_ID
MED_NAME,LAB_NAME,Unnamed: 2_level_1
ACD-A Citrate (1000ml),Temperature,141
ACD-A Citrate (500ml),Temperature,4
Abciximab (Reopro),Temperature,2
Acetaminophen-IV,Temperature,387
Acyclovir,Temperature,218
...,...,...
Verapamil,Temperature,9
Vitamin K (Phytonadione),Temperature,94
Vivonex (1/2),Temperature,1
Vivonex (Full),Temperature,39


In [172]:
pairs_df = med_lab_pairs.groupby(["MED_NAME", "LAB_NAME"]).count()[["HADM_ID"]]
pairs = pairs_df[pairs_df["HADM_ID"]>100].index
discovery_res1 = []
for med_name, lab_name in pairs:
    stat_test_df = []
    for a_w in after_windows:
        for b_w in before_windows:
            vals = med_lab_pairs[med_lab_pairs["LAB_NAME"]==lab_name]
            vals = vals[vals["MED_NAME"]==med_name]
            vals = vals[f"ratio_{b_w}_{a_w}"].replace([np.inf, -np.inf], np.nan).dropna()
            if vals.shape[0]>0:
                res = stats.ttest_1samp(vals.to_numpy(), popmean=1)
                row = {
                    "Med Name": med_name,
                    "Lab Name": lab_name,
                    "Before Window (in Hours)": b_w,
                    "After Window (in Hours)": a_w,
                    "No. of Patients": vals.shape[0],
                    "1-Sampled Ttest" : res.pvalue
                }
            stat_test_df.append(row)
    if len(stat_test_df)>0:
        discovery_res1.extend(stat_test_df)
res_df1 = pd.DataFrame(discovery_res1)

In [173]:
res_df = res_df1.copy()

In [174]:
res_df[res_df["No. of Patients"]>100]

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
6,Acetaminophen-IV,Temperature,"(0, 6)","(0, 4)",265,0.013550
7,Acetaminophen-IV,Temperature,"(6, 12)","(0, 4)",179,0.000473
8,Acetaminophen-IV,Temperature,"(0, 6)","(4, 8)",234,0.969004
9,Acetaminophen-IV,Temperature,"(6, 12)","(4, 8)",159,0.295592
10,Acetaminophen-IV,Temperature,"(0, 6)","(8, 12)",168,0.385002
...,...,...,...,...,...,...
729,Vancomycin,Temperature,"(6, 12)","(4, 8)",1424,0.115612
730,Vancomycin,Temperature,"(0, 6)","(8, 12)",2037,0.072181
731,Vancomycin,Temperature,"(6, 12)","(8, 12)",1211,0.105769
732,Vasopressin,Temperature,"(0, 6)","(0, 4)",140,0.396719


In [175]:
vital_name

'Temperature'

In [176]:
res_df[res_df["No. of Patients"]>100].to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv"))

In [177]:
# res_df = pd.read_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{vital_name}.csv")).drop(columns=["Unnamed: 0"]).drop_duplicates()

In [178]:
dis_analyzer = discovery.ClinicalDiscoveryAnalysis([])

In [179]:
test_pval_data, significant_hard_thres, significant_bonferroni, significant_fdr = dis_analyzer.generate_significant(res_df[res_df["No. of Patients"]>100].dropna(), statistical_test="1-Sampled Ttest")

In [180]:
test_pval_data

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Temperature,"(0, 6)","(0, 4)",265,0.013550,1.000000,0.085701
1,Acetaminophen-IV,Temperature,"(6, 12)","(0, 4)",179,0.000473,0.230111,0.006768
2,Acetaminophen-IV,Temperature,"(0, 6)","(4, 8)",234,0.969004,1.000000,0.979056
3,Acetaminophen-IV,Temperature,"(6, 12)","(4, 8)",159,0.295592,1.000000,0.563963
4,Acetaminophen-IV,Temperature,"(0, 6)","(8, 12)",168,0.385002,1.000000,0.622911
...,...,...,...,...,...,...,...,...
482,Vancomycin,Temperature,"(6, 12)","(4, 8)",1424,0.115612,1.000000,0.383425
483,Vancomycin,Temperature,"(0, 6)","(8, 12)",2037,0.072181,1.000000,0.290894
484,Vancomycin,Temperature,"(6, 12)","(8, 12)",1211,0.105769,1.000000,0.365317
485,Vasopressin,Temperature,"(0, 6)","(0, 4)",140,0.396719,1.000000,0.633775


In [181]:
significant_hard_thres.shape, significant_bonferroni.shape, significant_fdr.shape

((64, 8), (24, 8), (37, 8))

In [182]:
merged_sig_pairs = pd.merge(pd.merge(significant_fdr, significant_bonferroni, how="inner"), significant_hard_thres, how="inner")

In [183]:
merged_sig_pairs

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Albumin 5%,Temperature,"(6, 12)","(4, 8)",295,9.682158e-06,0.004715211,0.0002143278
1,Hydralazine,Temperature,"(0, 6)","(0, 4)",776,5.82491e-07,0.0002836731,1.772957e-05
2,Hydralazine,Temperature,"(6, 12)","(0, 4)",546,1.25449e-05,0.006109368,0.0002656247
3,Hydralazine,Temperature,"(0, 6)","(4, 8)",602,3.995885e-13,1.945996e-10,2.432495e-11
4,KCL (Bolus),Temperature,"(0, 6)","(4, 8)",821,6.897575e-06,0.003359119,0.000159958
5,LR,Temperature,"(0, 6)","(8, 12)",479,7.417275e-08,3.612213e-05,3.010178e-06
6,Labetalol,Temperature,"(0, 6)","(8, 12)",125,2.230092e-06,0.001086055,6.168628e-05
7,Metoprolol,Temperature,"(6, 12)","(0, 4)",867,2.110573e-08,1.027849e-05,9.344082e-07
8,Morphine Sulfate,Temperature,"(6, 12)","(0, 4)",571,1.092495e-07,5.320451e-05,3.800322e-06
9,Morphine Sulfate,Temperature,"(0, 6)","(4, 8)",588,2.279986e-06,0.001110353,6.168628e-05


In [184]:
merged_sig_pairs.to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"sig_pairs_intersection_mimic_vital_sign_{vital_name}.csv"))

## Combining

In [185]:
vitals = ["Systolic blood pressure", "Diastolic blood pressure", "Heart Rate", "Temperature", "Respiratory rate"]

In [190]:
sig_vital_files = [os.path.join(setup_io_config(root_path=root_path)[1], f"sig_pairs_intersection_mimic_vital_sign_{v}.csv") for v in vitals]

In [188]:
pval_files = [os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_sign_{v}.csv") for v in vitals]

In [195]:
merged_sig_pairs = pd.concat([pd.read_csv(v).drop(columns=["Unnamed: 0"]) for v in sig_vital_files]).reset_index().drop(columns=["index"])
merged_pvals = pd.concat([pd.read_csv(v).drop(columns=["Unnamed: 0"]) for v in pval_files]).reset_index().drop(columns=["index"])

In [199]:
merged_sig_pairs

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(0, 4)",328,4.413339e-08,2.431750e-05,5.655232e-07
1,Albumin 5%,Systolic blood pressure,"(0, 6)","(0, 4)",991,1.172105e-05,6.458300e-03,7.781085e-05
2,Amiodarone,Systolic blood pressure,"(0, 6)","(4, 8)",327,1.622566e-05,8.940337e-03,1.039574e-04
3,Calcium Gluconate,Systolic blood pressure,"(0, 6)","(0, 4)",2798,3.399097e-13,1.872902e-10,9.364511e-12
4,Calcium Gluconate,Systolic blood pressure,"(6, 12)","(0, 4)",1838,7.193575e-17,3.963660e-14,3.048969e-15
...,...,...,...,...,...,...,...,...
243,PO Intake,Respiratory rate,"(0, 6)","(0, 4)",5728,2.022347e-14,1.053643e-11,5.268215e-12
244,PO Intake,Respiratory rate,"(6, 12)","(0, 4)",4138,2.030176e-26,1.057722e-23,1.057722e-23
245,PO Intake,Respiratory rate,"(6, 12)","(4, 8)",1548,4.393294e-10,2.288906e-07,4.577813e-08
246,Phenylephrine,Respiratory rate,"(0, 6)","(8, 12)",275,3.619673e-06,1.885850e-03,1.047694e-04


In [197]:
merged_pvals

Unnamed: 0,Med Name,Lab Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
0,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(0, 4)",328,4.413339e-08
1,Acetaminophen-IV,Systolic blood pressure,"(6, 12)","(0, 4)",233,9.361079e-01
2,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(4, 8)",311,1.695153e-04
3,Acetaminophen-IV,Systolic blood pressure,"(6, 12)","(4, 8)",217,2.629712e-01
4,Acetaminophen-IV,Systolic blood pressure,"(0, 6)","(8, 12)",213,4.635270e-01
...,...,...,...,...,...,...
2656,Vancomycin,Respiratory rate,"(6, 12)","(8, 12)",1556,2.264436e-03
2657,Vasopressin,Respiratory rate,"(0, 6)","(0, 4)",201,5.308395e-01
2658,Vasopressin,Respiratory rate,"(6, 12)","(0, 4)",173,6.030755e-01
2659,Vasopressin,Respiratory rate,"(0, 6)","(4, 8)",110,9.158257e-01


In [198]:
merged_sig_pairs.to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"sig_pairs_intersection_mimic_vital_signs_all.csv"))
merged_pvals.to_csv(os.path.join(setup_io_config(root_path=root_path)[1], f"pvals_mimic_vital_signs_all.csv"))