In [15]:
%load_ext autoreload

In [20]:
%autoreload 2

In [1]:
from src.parsers import mimic, hirid
from src.modeling import discovery, plots, querier
from src.utils import constants

In [2]:
import sys
# import win32com.client
import os
import pandas as pd

def setup_io_config(root_path):
    """
    Input - Output config. Add dataset paths
    :root_path -> Repo path which contains 'data' and 'res' folders
    """

    # MIMIC
    is_shortcut = True if "data.lnk" in os.listdir(root_path) else False 
    
    if (is_shortcut):
        path_shortcut =  os.path.join(root_path, "data.lnk")
        shell = win32com.client.Dispatch("WScript.Shell")
        mimic_data = shell.CreateShortCut(path_shortcut).Targetpath
    else:
        mimic_data = os.path.join(f"{root_path}", "data") 
    mimic_path = os.path.join(f"{root_path}", "results")

    # HIRID
    hirid_data = f'{root_path}/data/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/'
    hirid_path = f'{root_path}/data/hirid-a-high-time-resolution-icu-dataset-1.1.1'
    
    return mimic_data, mimic_path, hirid_data, hirid_path

def setup_stratification_config():
    gender="MF"
    age_b=40
    age_a=80 
    ethnicity="WHITE" 
    lab_mapping= constants.LAB_MAPPING
    b_w = [(0,6), (6,12)]
    a_w = [(0,4), (4,8), (8,12)]
    before_windows = b_w
    after_windows = a_w
    return gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows

In [3]:
# IO Config
# root_path ="C:\\Users\\danco\\My Drive\\Master\\Datasets\\MIMIC iii"
root_path = "/Users/pavan/Library/CloudStorage/GoogleDrive-f20190038@hyderabad.bits-pilani.ac.in/My Drive/TAU/Code/DrugLab"
data, res, raw_path, res_path = setup_io_config(root_path=root_path)

# Stratification Config
gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows = setup_stratification_config()

In [None]:
# MIMIC
mimic_parser = mimic.MIMICParser(data=data, res=res, gender=gender, age_b=age_b, age_a=age_a, ethnicity=ethnicity, load="AUTOMATIC_MAPPING_MIMIC")
m_med1, m_med2, m_labs = mimic_parser.parse(use_pairs=False, load_from_raw=False, load_raw_chartevents=False)

## Querier
mimic_data_querier = querier.DatasetQuerier(
    data = data,
    res = res,
    t_labs=m_labs, 
    t_med1=m_med1, 
    t_med2=m_med2,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
    lab_mapping=lab_mapping
)
# query pairs for all medication and lab tests
m_final_lab_med_data = mimic_data_querier.generate_med_lab_data(before_windows, after_windows)
# Querying pairs for a single medication and lab test
b_w = [(0,6), (6,12)]
a_w = [(0,4), (4,8), (8,12)]
med_lab_pair_1 = mimic_data_querier.query('Insulin - Regular', 'Glucose', b_w, a_w)

## Discovery Analysis for the queried medication and lab test pairs in the chosen before and after windows
analyzer = discovery.ClinicalDiscoveryAnalysis(m_final_lab_med_data)
pvals_med_lab = analyzer.analyze(before_windows, after_windows)
sig_med_lab = analyzer.generate_significant(pvals_med_lab.dropna(subset=["TTest Paired"]))

## Plots
plotter = plots.ClinicalPlotAnalysis(
    data = data,
    res = res,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
    lab_mapping=lab_mapping
)
m_corrs_data_df = plotter.plot(m_final_lab_med_data, m_labs, before_windows=before_windows, after_windows=after_windows)

In [None]:
# HIRID
hirid_mapping = constants.HIRID_MAPPING
lab_parts = (0,10)
hirid_parser = hirid.HiRiDParser(data=raw_path, res=res_path, gender=gender, age_b=age_b, age_a=age_a, load="MANUAL_MAPPING_HIRID")
h_med1, h_med2, h_labs = hirid_parser.parse(lab_parts=lab_parts)

In [None]:
# lab_ids = [l for k in hirid_mapping.values() for l in k]
# h_labs = h_labs[h_labs.OldITEMID.isin(lab_ids)]

In [30]:
h_labs.ITEMID.value_counts()

Respiratory rate                                                             2863687
Heart rate                                                                   2113052
Peripheral oxygen saturation                                                 2003155
Metronidazole tabl 200 mg                                                     786114
Core body temperature                                                         210708
Glucose [Moles/volume] in Serum or Plasma                                      19938
Sodium [Moles/volume] in Blood                                                 12201
Potassium [Moles/volume] in Blood                                              12108
Carboxyhemoglobin/Hemoglobin.total in Arterial blood                           10970
Methemoglobin/Hemoglobin.total in Arterial blood                               10966
Calcium.ionized [Moles/volume] in Blood                                        10807
Hemoglobin [Mass/volume] in Arterial blood                       

In [16]:
# h_labs.to_csv("temp_hirid_labs.csv")
h_labs = pd.read_csv("temp_hirid_labs.csv").drop(columns=["Unnamed: 0"])

  h_labs = pd.read_csv("temp_hirid_labs.csv").drop(columns=["Unnamed: 0"])


In [17]:
# h_med1.to_csv("temp_hirid_med1.csv")
h_med1 = pd.read_csv("temp_hirid_med1.csv").drop(columns=["Unnamed: 0"])
# h_med2.to_csv("temp_hirid_med2.csv")
h_med2 = pd.read_csv("temp_hirid_med2.csv").drop(columns=["Unnamed: 0"])

In [19]:
hirid_data_querier = querier.DatasetQuerier(
    data = raw_path,
    res = res_path,
    t_labs=h_labs, 
    t_med1=h_med1, 
    t_med2=h_med2,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
)

In [None]:
final_h_final_lab_med_data, raw_h_final_lab_med_data = hirid_data_querier.generate_med_lab_data(before_windows, after_windows, lab_parts=lab_parts)

In [4]:
final_h_final_lab_med_data = pd.read_csv("temp_hirid_med_lab_pairs_with_vital_signs_(0, 50).csv").drop(columns="Unnamed: 0")

In [5]:
final_h_final_lab_med_data

Unnamed: 0,HADM_ID,OldITEMID,STARTTIME,ENDTIME,givendose,cumulativedose,fluidamount_calc,cumulfluidamount_calc,doseunit,route,...,"after_time_(4, 8)_sp","after_abs_(4, 8)_sp","after_std_(4, 8)_sp","after_mean_(4, 8)_sp","after_trends_(8, 12)_sp","after_time_(8, 12)_sp","after_abs_(8, 12)_sp","after_std_(8, 12)_sp","after_mean_(8, 12)_sp",LAB_NAME
0,4,107,2149-01-09 22:18:00,2149-01-09 22:18:40.283,0.0,0.0,0.0,0.0,U,iv-inf,...,,,,,,,,,,Carboxyhemoglobin/Hemoglobin.total in Arterial...
1,4,107,2149-01-09 22:18:00,2149-01-09 22:18:40.283,0.0,0.0,0.0,0.0,U,iv-inf,...,,,,,,,,,,Glucose [Moles/volume] in Serum or Plasma
2,4,107,2149-01-09 22:18:00,2149-01-09 22:18:40.283,0.0,0.0,0.0,0.0,U,iv-inf,...,,,,,,,,,,Heart rate
3,4,107,2149-01-09 22:18:00,2149-01-09 22:18:40.283,0.0,0.0,0.0,0.0,U,iv-inf,...,,,,,,,,,,Hemoglobin [Mass/volume] in Arterial blood
4,4,107,2149-01-09 22:18:00,2149-01-09 22:18:40.283,0.0,0.0,0.0,0.0,U,iv-inf,...,,,,,,,,,,Hemoglobin [Mass/volume] in Blood
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208775,33741,225,2170-01-01 19:00:00,2170-01-01 19:05:13.690,1.0,1.0,0.0,0.0,g,p rectal,...,,,,,,10.912864,99.00,0.0,99.00,Creatinine [Moles/volume] in Blood
208776,33904,1000747,2162-03-20 18:51:00,2162-03-20 18:51:56.643,20.0,20.0,2.0,2.0,mg,cv-inj,...,,,,,,11.950933,78.00,0.0,78.00,Alanine aminotransferase [Enzymatic activity/v...
208777,33904,1000747,2162-03-20 18:51:00,2162-03-20 18:51:56.643,20.0,20.0,2.0,2.0,mg,cv-inj,...,,,,,,11.950933,63.00,0.0,63.00,Aspartate aminotransferase [Enzymatic activity...
208778,33904,1000747,2162-03-20 18:51:00,2162-03-20 18:51:56.643,20.0,20.0,2.0,2.0,mg,cv-inj,...,,,,,,11.950933,57.00,0.0,57.00,Creatinine [Moles/volume] in Blood


In [6]:
## Discovery Analysis for the queried medication and lab test pairs in the chosen before and after windows
analyzer = discovery.ClinicalDiscoveryAnalysis(final_h_final_lab_med_data)
types_l = ["abs", "mean", "std", "trends"]
pvals_med_lab = analyzer.analyze(before_windows, after_windows, min_patients=100, types_l=types_l)

In [None]:
pvals_med_lab["After Window (in Hours)"].value_counts()

(0, 4)    198
Name: After Window (in Hours), dtype: int64

In [None]:
pvals_med_lab["Before Window (in Hours)"].value_counts()

In [7]:
pvals_med_lab["Type"].value_counts()

abs    198
Name: Type, dtype: int64

In [20]:
# pvals_med_lab.to_csv("temp_hirid_pvals_med_lab_pairs.csv")
# pvals_med_lab = pd.read_csv("temp_hirid_pvals_med_lab_pairs.csv").drop(columns=["Unnamed: 0"])

In [8]:
pvals_med_lab

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,Type
0,Glucose [Moles/volume] in Serum or Plasma,Aspirin Tbl 100 mg,"(0, 6)","(0, 4)",0.821917,0.507556,2.718201e-01,195,abs
1,Heart rate,Aspirin Tbl 100 mg,"(0, 6)","(0, 4)",0.875718,0.693659,4.985509e-02,385,abs
2,Metronidazole tabl 200 mg,Aspirin Tbl 100 mg,"(0, 6)","(0, 4)",0.501696,0.543505,3.207701e-01,335,abs
3,Peripheral oxygen saturation,Aspirin Tbl 100 mg,"(0, 6)","(0, 4)",0.758049,0.892949,8.269229e-01,385,abs
4,Respiratory rate,Aspirin Tbl 100 mg,"(0, 6)","(0, 4)",0.323051,0.233550,5.070515e-02,338,abs
...,...,...,...,...,...,...,...,...,...
193,Metronidazole tabl 200 mg,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.653531,0.970078,9.487329e-01,396,abs
194,Peripheral oxygen saturation,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.981826,0.687517,4.906269e-01,404,abs
195,Potassium [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.002433,0.002195,3.260668e-06,202,abs
196,Respiratory rate,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.000001,0.051343,1.302714e-03,396,abs


In [21]:
pval, hard, bonferroni, fdr = analyzer.generate_significant(pvals_med_lab.dropna(subset=["TTest Paired"]))

In [9]:
# pval, hard, bonferroni, fdr = sig_med_lab
# fdr.to_csv("temp_hirid_sig_med_lab_pairs_fdr.csv")
# bonferroni.to_csv("temp_hirid_sig_med_lab_pairs_bonferroni.csv")
# hard.to_csv("temp_hirid_sig_med_lab_pairs_hard.csv")
# pval.to_csv("temp_hirid_sig_med_lab_pairs_pval.csv")

fdr = pd.read_csv("temp_hirid_sig_med_lab_pairs_fdr.csv").drop(columns=["Unnamed: 0"])
bonferroni = pd.read_csv("temp_hirid_sig_med_lab_pairs_bonferroni.csv").drop(columns=["Unnamed: 0"])
hard = pd.read_csv("temp_hirid_sig_med_lab_pairs_hard.csv").drop(columns=["Unnamed: 0"])
pval = pd.read_csv("temp_hirid_sig_med_lab_pairs_pval.csv").drop(columns=["Unnamed: 0"])

NameError: name 'fdr' is not defined

In [26]:
fdr1 = fdr[fdr["No of Patients"]>=100]
fdr1.sort_values(["TTest Paired"])

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,BonferroniPvals,FDR Benjamini Corrected
1,Calcium.ionized [Moles/volume] in Blood,Benuron supp 1000 mg,"(0, 6)","(0, 4)",2.128423e-05,9.3e-05,2.114749e-14,164,1.0,1.0
4,Sodium [Moles/volume] in Blood,Benuron supp 1000 mg,"(0, 6)","(0, 4)",0.0005472394,0.001141,3.311539e-13,166,1.0,1.0
42,Sodium [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.000494871,0.004436,1.454019e-12,201,,
20,Respiratory rate,Fentanyl inj 50 µg/ml,"(0, 6)","(0, 4)",5.822183e-08,7e-06,1.549206e-11,700,1.0,1.0
39,Calcium.ionized [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.003788822,0.023479,2.332097e-07,197,,
2,Potassium [Moles/volume] in Blood,Benuron supp 1000 mg,"(0, 6)","(0, 4)",0.0006783857,0.000269,2.656801e-07,167,1.0,1.0
14,Respiratory rate,Disoprivan 2%,"(0, 6)","(0, 4)",9.491187e-08,0.705887,3.607533e-07,340,1.0,1.0
41,Potassium [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.002432545,0.002195,3.260668e-06,202,,
18,Metronidazole tabl 200 mg,Esmeron Inj Lsg 50 mg,"(0, 6)","(0, 4)",0.0001291785,0.001096,4.191782e-06,132,1.0,1.0
40,Glucose [Moles/volume] in Serum or Plasma,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.002367432,0.020415,2.031535e-05,292,,


In [28]:
bonferroni1 = bonferroni[bonferroni["No of Patients"]>=100]
bonferroni1.sort_values(["TTest Paired"])

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,BonferroniPvals,FDR Benjamini Corrected
0,Calcium.ionized [Moles/volume] in Blood,Benuron supp 1000 mg,"(0, 6)","(0, 4)",2.128423e-05,9.3e-05,2.114749e-14,164,1.0,1.0
2,Sodium [Moles/volume] in Blood,Benuron supp 1000 mg,"(0, 6)","(0, 4)",0.0005472394,0.001141,3.311539e-13,166,1.0,1.0
36,Sodium [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.000494871,0.004436,1.454019e-12,201,,
17,Respiratory rate,Fentanyl inj 50 µg/ml,"(0, 6)","(0, 4)",5.822183e-08,7e-06,1.549206e-11,700,1.0,1.0
35,Calcium.ionized [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.003788822,0.023479,2.332097e-07,197,,
1,Potassium [Moles/volume] in Blood,Benuron supp 1000 mg,"(0, 6)","(0, 4)",0.0006783857,0.000269,2.656801e-07,167,1.0,1.0
12,Respiratory rate,Disoprivan 2%,"(0, 6)","(0, 4)",9.491187e-08,0.705887,3.607533e-07,340,1.0,1.0


In [25]:
hard

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,BonferroniPvals,FDR Benjamini Corrected
0,Sodium [Moles/volume] in Blood,Aspirin Tbl 100 mg,"(0, 6)","(0, 4)",0.175381,0.229009,1.361095e-04,82,1.0,1.0
1,Respiratory rate,Augmentin 2.2 Inf Lsg,"(0, 6)","(0, 4)",0.001558,0.000363,1.585196e-05,36,1.0,1.0
2,Heart rate,Beloc Inj Lsg,"(0, 6)","(0, 4)",0.406253,0.553648,6.337474e-04,44,1.0,1.0
3,Glucose [Moles/volume] in Serum or Plasma,Beloc ZOK ret Tbl 25 mg,"(0, 6)","(0, 4)",0.088639,0.089094,7.610830e-03,105,1.0,1.0
4,Hemoglobin [Mass/volume] in Arterial blood,Benerva Inj Lsg 100mg/1 ml,"(0, 6)","(0, 4)",0.230994,0.319285,2.297455e-03,120,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
91,Potassium [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.002433,0.002195,3.260668e-06,202,,
92,Respiratory rate,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.000001,0.051343,1.302714e-03,396,,
93,Sodium [Moles/volume] in Blood,Zinacef Amp 1.5 g,"(0, 6)","(0, 4)",0.000495,0.004436,1.454019e-12,201,,
94,Heart rate,Zoloft Tbl 50 mg,"(0, 6)","(0, 4)",0.666667,0.978728,0.000000e+00,2,,


In [None]:

h_plotter = plots.ClinicalPlotAnalysis(
    data = raw_path,
    res = res_path,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity="", 
    lab_mapping={}
)
h_corrs_data_df = h_plotter.plot(final_h_final_lab_med_data, h_labs, before_windows=before_windows, after_windows=after_windows)