## Utils

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
from src.parsers import mimic
from src.utils import constants
from scipy import stats
import numpy as np

In [4]:
import sys
# import win32com.client
import os
import pandas as pd

def setup_io_config(root_path):
    """
    Input - Output config. Add dataset paths
    :root_path -> Repo path which contains 'data' and 'res' folders
    """

    # MIMIC
    is_shortcut = True if "data.lnk" in os.listdir(root_path) else False 
    
    if (is_shortcut):
        path_shortcut =  os.path.join(root_path, "data.lnk")
        shell = win32com.client.Dispatch("WScript.Shell")
        mimic_data = shell.CreateShortCut(path_shortcut).Targetpath
    else:
        mimic_data = os.path.join(f"{root_path}", "data") 
    mimic_path = os.path.join(f"{root_path}", "results")

    # HIRID
    hirid_data = f'{root_path}/data/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/'
    hirid_path = f'{root_path}/data/hirid-a-high-time-resolution-icu-dataset-1.1.1'
    
    return mimic_data, mimic_path, hirid_data, hirid_path

def setup_stratification_config():
    gender="MF"
    age_b=40
    age_a=80 
    ethnicity="WHITE" 
    lab_mapping= constants.LAB_MAPPING
    b_w = [(0,6), (6,12)]
    a_w = [(0,4), (4,8), (8,12)]
    before_windows = b_w
    after_windows = a_w
    return gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows

In [5]:
# IO Config
# root_path ="C:\\Users\\danco\\My Drive\\Master\\Datasets\\MIMIC iii"
root_path = "/Users/pavan/Library/CloudStorage/GoogleDrive-f20190038@hyderabad.bits-pilani.ac.in/My Drive/TAU/Code/DrugLab"
data, res, raw_path, res_path = setup_io_config(root_path=root_path)

# Stratification Config
gender, age_a, age_b, ethnicity, lab_mapping, before_windows, after_windows = setup_stratification_config()

## MIMIC

In [4]:
mimic_parser = mimic.MIMICParser(data=data, res=res, gender=gender, age_b=age_b, age_a=age_a, ethnicity=ethnicity, load="AUTOMATIC_MAPPING_MIMIC")
m_n_meds, m_labs = mimic_parser.parse(load_raw_chartevents=False, n_meds=True)

Loading med data...
Loaded med data.
Load Med data...
Load Lab data...


  labs = pd.read_csv(os.path.join(self.data, constants.MIMIC_III_PREPROCESSED_PATH, constants.MIMIC_III_PREPROCESSED_LABDATA))


Loaded Lab data.
Loading 1 med data...
Loaded 1 med data with 138658 medication administrations.
Loading 2 med data...
Loaded 2 med data with 101664 medication administrations.
Loading 3 med data...
Loaded 3 med data with 81753 medication administrations.
Loading 4 med data...
Loaded 4 med data with 67896 medication administrations.
Loading 5 med data...
Loaded 5 med data with 58458 medication administrations.
Loading 6 med data...
Loaded 6 med data with 51001 medication administrations.
Loading 7 med data...
Loaded 7 med data with 45266 medication administrations.
Loading 8 med data...
Loaded 8 med data with 40652 medication administrations.
Loading 9 med data...
Loaded 9 med data with 36770 medication administrations.
Loading 10 med data...
Loaded 10 med data with 33559 medication administrations.
Loading 11 med data...
Loaded 11 med data with 30773 medication administrations.
Loading 12 med data...
Loaded 12 med data with 28399 medication administrations.
Loading 13 med data...
Load

  med_k = pd.read_csv(med_vect_data_path) if not load_from_raw and os.path.exists(med_vect_data_path) else self.generate_med_k_vect(med_preprocessed=med_preprocessed, k=k)


Loading 36 med data...
Loaded 36 med data with 8193 medication administrations.
Loading 37 med data...
Loaded 37 med data with 7896 medication administrations.
Loading 38 med data...
Loaded 38 med data with 7606 medication administrations.
Loading 39 med data...
Loaded 39 med data with 7349 medication administrations.
Loading 40 med data...
Loaded 40 med data with 7098 medication administrations.
Loading 41 med data...
Loaded 41 med data with 6890 medication administrations.
Loading 42 med data...
Loaded 42 med data with 6671 medication administrations.
Loading 43 med data...
Loaded 43 med data with 6475 medication administrations.
Loading 44 med data...
Loaded 44 med data with 6240 medication administrations.
Loading 45 med data...
Loaded 45 med data with 6062 medication administrations.
Loading 46 med data...
Loaded 46 med data with 5914 medication administrations.
Loading 47 med data...
Loaded 47 med data with 5761 medication administrations.
Loading 48 med data...
Loaded 48 med dat

In [29]:
from src.modeling import querier
## Querier
mimic_data_querier = querier.DatasetQuerier(
    data = data,
    res = res,
    t_labs=m_labs, 
    meds=m_n_meds[180:185],
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
    lab_mapping=lab_mapping
)

In [30]:
# query pairs for all medication and lab tests
final_pairs_data, interim_pairs_data = mimic_data_querier.generate_med_lab_data(before_windows, after_windows, lab_parts=(0,1000))

100%|██████████| 5/5 [03:54<00:00, 46.81s/it]


In [41]:
[k.shape for k in final_pairs_data]

[(3285, 96), (2535, 96), (2485, 96), (2457, 96), (3051, 96)]

In [38]:
m_n_meds[180].shape, m_n_meds[0].shape

((524, 69), (138658, 68))

In [39]:
concat_final_pairs_data = pd.concat(final_pairs_data)

In [74]:
from src.modeling import discovery
## Discovery Analysis for the queried medication and lab test pairs in the chosen before and after windows
analyzer = discovery.ClinicalDiscoveryAnalysis(concat_final_pairs_data)
pvals_med_lab = analyzer.analyze(before_windows, after_windows)
pvals_ratio_sampled = analyzer.analyze_ratio(before_windows, after_windows)

In [75]:
pvals_med_lab

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,Type
0,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(0, 4)",0.869181,0.814442,0.651946,323,abs
1,Diastolic blood pressure,Dextrose 5%,"(6, 12)","(0, 4)",0.772127,0.559803,0.327799,129,abs
2,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(4, 8)",0.943172,0.97847,0.966841,149,abs
3,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(8, 12)",0.995403,0.909159,0.864936,104,abs
4,Heart Rate,Dextrose 5%,"(0, 6)","(0, 4)",0.482301,0.423557,0.046956,321,abs
5,Heart Rate,Dextrose 5%,"(6, 12)","(0, 4)",0.476579,0.411156,0.187444,128,abs
6,Heart Rate,Dextrose 5%,"(0, 6)","(4, 8)",0.520203,0.611674,0.328822,147,abs
7,Heart Rate,Dextrose 5%,"(0, 6)","(8, 12)",0.640685,0.511485,0.299478,104,abs
8,Pulse oximetry,Dextrose 5%,"(0, 6)","(0, 4)",0.330995,0.52008,0.253191,318,abs
9,Pulse oximetry,Dextrose 5%,"(6, 12)","(0, 4)",0.28924,0.249883,0.105499,128,abs


In [76]:
pvals_ratio_sampled

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
0,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(0, 4)",299,0.098280
1,Diastolic blood pressure,Dextrose 5%,"(6, 12)","(0, 4)",117,0.035804
2,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(4, 8)",142,0.175400
3,Diastolic blood pressure,Dextrose 5%,"(6, 12)","(4, 8)",49,0.185905
4,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(8, 12)",98,0.570025
...,...,...,...,...,...,...
123,Temperature (F),Solution,"(0, 6)","(0, 4)",53,0.021065
124,Temperature (F),Solution,"(6, 12)","(0, 4)",32,0.813956
125,Temperature (F),Solution,"(0, 6)","(4, 8)",30,0.629980
126,Temperature (F),Solution,"(6, 12)","(4, 8)",15,0.913140


In [77]:
test_pval_data, significant_hard_thres, significant_bonferroni, significant_fdr = analyzer.generate_significant(pvals_med_lab)
merged_sig_pairs = pd.merge(pd.merge(significant_fdr, significant_bonferroni, how="inner"), significant_hard_thres, how="inner")

In [78]:
merged_sig_pairs

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,Type,BonferroniPvals,FDR Benjamini Corrected


In [79]:
test_pval_data_ratio, significant_hard_thres_ratio, significant_bonferroni_ratio, significant_fdr_ratio = analyzer.generate_significant(pvals_ratio_sampled[pvals_ratio_sampled["No. of Patients"]>100].dropna(), statistical_test="1-Sampled Ttest")
merged_sig_pairs_ratio = pd.merge(pd.merge(significant_fdr_ratio, significant_bonferroni_ratio, how="inner"), significant_hard_thres_ratio, how="inner")

In [80]:
merged_sig_pairs_ratio

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Respiratory rate,NaCl 0.9%,"(6, 12)","(0, 4)",110,8.4e-05,0.002676,0.002676


## HIRID

In [6]:
from src.parsers import hirid

In [8]:
hirid_mapping = constants.HIRID_MAPPING
hirid_parser = hirid.HiRiDParser(data=raw_path, res=res_path, gender=gender, age_b=age_b, age_a=age_a, load="AUTOMATIC_MAPPING_HIRID")
h_meds, h_labs = hirid_parser.parse(lab_parts=(0,5), n_med_limit=1000)
lab_ids = [l for k in hirid_mapping.values() for l in k]
h_labs_1 = h_labs[h_labs.OldITEMID.isin(lab_ids)]

KeyboardInterrupt: 

In [None]:
h_meds

In [None]:
hirid_data_querier = querier.DatasetQuerier(
    data = raw_path,
    res = res_path,
    t_labs=h_labs, 
    meds=h_meds[len(h_meds)-20 : len(h_meds)-15],
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
)

In [None]:
# query pairs for all medication and lab tests
final_pairs_data_h, interim_pairs_data_h = hirid_data_querier.generate_med_lab_data(before_windows, after_windows, lab_parts=(0,1000))

100%|██████████| 5/5 [03:54<00:00, 46.81s/it]


In [None]:
[k.shape for k in final_pairs_data]

[(3285, 96), (2535, 96), (2485, 96), (2457, 96), (3051, 96)]

In [None]:
h_meds[len(h_meds)-20].shape, h_meds[0].shape

((524, 69), (138658, 68))

In [None]:
concat_final_pairs_data = pd.concat(final_pairs_data)

In [None]:
from src.modeling import discovery
## Discovery Analysis for the queried medication and lab test pairs in the chosen before and after windows
analyzer = discovery.ClinicalDiscoveryAnalysis(concat_final_pairs_data)
pvals_med_lab = analyzer.analyze(before_windows, after_windows)
pvals_ratio_sampled = analyzer.analyze_ratio(before_windows, after_windows)

In [None]:
pvals_med_lab

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,Type
0,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(0, 4)",0.869181,0.814442,0.651946,323,abs
1,Diastolic blood pressure,Dextrose 5%,"(6, 12)","(0, 4)",0.772127,0.559803,0.327799,129,abs
2,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(4, 8)",0.943172,0.97847,0.966841,149,abs
3,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(8, 12)",0.995403,0.909159,0.864936,104,abs
4,Heart Rate,Dextrose 5%,"(0, 6)","(0, 4)",0.482301,0.423557,0.046956,321,abs
5,Heart Rate,Dextrose 5%,"(6, 12)","(0, 4)",0.476579,0.411156,0.187444,128,abs
6,Heart Rate,Dextrose 5%,"(0, 6)","(4, 8)",0.520203,0.611674,0.328822,147,abs
7,Heart Rate,Dextrose 5%,"(0, 6)","(8, 12)",0.640685,0.511485,0.299478,104,abs
8,Pulse oximetry,Dextrose 5%,"(0, 6)","(0, 4)",0.330995,0.52008,0.253191,318,abs
9,Pulse oximetry,Dextrose 5%,"(6, 12)","(0, 4)",0.28924,0.249883,0.105499,128,abs


In [None]:
pvals_ratio_sampled

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest
0,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(0, 4)",299,0.098280
1,Diastolic blood pressure,Dextrose 5%,"(6, 12)","(0, 4)",117,0.035804
2,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(4, 8)",142,0.175400
3,Diastolic blood pressure,Dextrose 5%,"(6, 12)","(4, 8)",49,0.185905
4,Diastolic blood pressure,Dextrose 5%,"(0, 6)","(8, 12)",98,0.570025
...,...,...,...,...,...,...
123,Temperature (F),Solution,"(0, 6)","(0, 4)",53,0.021065
124,Temperature (F),Solution,"(6, 12)","(0, 4)",32,0.813956
125,Temperature (F),Solution,"(0, 6)","(4, 8)",30,0.629980
126,Temperature (F),Solution,"(6, 12)","(4, 8)",15,0.913140


In [None]:
test_pval_data, significant_hard_thres, significant_bonferroni, significant_fdr = analyzer.generate_significant(pvals_med_lab)
merged_sig_pairs = pd.merge(pd.merge(significant_fdr, significant_bonferroni, how="inner"), significant_hard_thres, how="inner")

In [None]:
merged_sig_pairs

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),Mannwhitneyu Test,TTest Independent,TTest Paired,No of Patients,Type,BonferroniPvals,FDR Benjamini Corrected


In [None]:
test_pval_data_ratio, significant_hard_thres_ratio, significant_bonferroni_ratio, significant_fdr_ratio = analyzer.generate_significant(pvals_ratio_sampled[pvals_ratio_sampled["No. of Patients"]>100].dropna(), statistical_test="1-Sampled Ttest")
merged_sig_pairs_ratio = pd.merge(pd.merge(significant_fdr_ratio, significant_bonferroni_ratio, how="inner"), significant_hard_thres_ratio, how="inner")

In [None]:
merged_sig_pairs_ratio

Unnamed: 0,Lab Name,Med Name,Before Window (in Hours),After Window (in Hours),No. of Patients,1-Sampled Ttest,BonferroniPvals,FDR Benjamini Corrected
0,Respiratory rate,NaCl 0.9%,"(6, 12)","(0, 4)",110,8.4e-05,0.002676,0.002676
