## Mount Drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')

## Lab test MIMIC Extract Mapping

From other file

In [1]:
lab_mapping = {'ALT': {'Alanine aminotransferase': [50861, 769, 220644]},
 'ANA': {'Fraction inspired oxygen': [189]},
 'AST': {'Asparate aminotransferase': [50878, 770, 220587]},
 'Hemoglobin': {'Hemoglobin percent': [50852],
  'Hemoglobin C': [51224],
  'Hemoglobin F': [51225],
  'Hemoglobin A2': [51223],
  'Hemoglobin': [814, 220228, 51222, 50811]},
 'INR': {'Prothrombin time INR': [51237, 815, 1530, 227467]},
 'bilirubin': {'Bilirubin': [51465,
   50883,
   803,
   225651,
   50885,
   1538,
   848,
   225690,
   50884],
#   'Bilirubin, Total, Pleural': [51049],
#   'Bilirubin, Total, Body Fluid': [51028],
#   'Bilirubin, Total, Ascites': [50838]
  },
 'calcium': {'Calcium': [786, 1522, 3746, 51029, 50893, 225625],
  'Calcium ionized': [50808, 816, 225667, 3766],
#   'Calcium urine': [51066, 51077]
  },
 'creatinine': {'Creatinine': [791, 1525, 220615, 50912],
  'Creatinine ascites': [50841],
#   'Creatinine body fluid': [51032],
#   'Creatinine pleural': [51052],
#   'Creatinine urine': [51082]
},
 'glucose': {'Glucose': [50931,
   807,
   811,
   1529,
   50809,
   3745,
   225664,
   220621,
   226537],
#   'Glucose urine': [51478],
#   'Glucose, CSF': [51014],
#   'Estimated Actual Glucose': [51529],
#   'Glucose, Urine': [51084],
#   'Glucose, Pleural': [51053],
#   'Glucose, Joint Fluid': [51022],
#   'Glucose, Ascites': [50842],
#   'Glucose, Body Fluid': [51034]
},
 'lactic acid': {'Lactic acid': [818, 225668, 1531]},
 'magnesium': {'Magnesium': [50960], 'Magnesium, Urine': [51088]},
 'platelets': {'Platelets': [51265, 828, 227457], 'Large Platelets': [51240]},
 'potassium': {'Potassium': [829, 1535, 227464, 50971, 50822],
#   'Potassium serum': [227442],
#   'Potassium, Body Fluid': [51041],
#   'Potassium, Pleural': [51057],
#   'Potassium, Stool': [51064],
#   'Potassium, Urine': [51097],
#   'Potassium, Ascites': [50847]
  },
 'sodium': {'Sodium': [837, 1536, 220645, 226534, 50983, 50824],
#   'Sodium, Ascites': [50848],
#   'Sodium, Body Fluid': [51042],
#   'Sodium, Pleural': [51058],
#   'Sodium, Stool': [51065],
#   'Sodium, Urine': [51100]
  },
 'Uric acid': {'Uric Acid': [51007], 'Uric Acid, Urine': [51105]},
 'B12': {'Vitamin B12': [51010]},
 'prolactin': {'Prolactin': [50973]},
 'Amylase': {'Amylase': [50867], 
#  'Amylase, Ascites': [50836], 'Amylase, Body Fluid': [51026],'Amylase, Joint Fluid': [51020],'Amylase, Pleural': [51047], 'Amylase, Urine': [51072]
 },
 'Lipase': {'Lipase': [50956], 
#  'Lipase, Ascites': [50844], 'Lipase, Body Fluid': [51036]
 },
 'Aptt': {'PTT': [825, 1533, 227466, 51275]}}
lab_mapping['Hematocrit']  = {
    'Hematocrit' : [813, 220545, 51221, 50810]
}
lab_mapping['Red blood cell'] = {
    'Red blood cell': [51279, 833]
}
lab_mapping['Albumin'] = {
    'Albumin': [50862, 772, 1521, 227456]
}
lab_mapping['Magnesium'] = {
    'Magnesium': [821, 1532, 220635, 50960]
}
lab_mapping["CPK"] = {
}


## Source Code for Plotting and Querying data

In [50]:
import numpy as np
import pickle
from abc import ABC, abstractmethod
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import seaborn as sns
import pandas as pd
import os
from scipy.stats import pearsonr, spearmanr


In [60]:
# Util Functions

def sort_rows_with_time(p_corrs, s_corrs, after_windows):
    s_p = sorted([k for k in zip(p_corrs, after_windows)], key=lambda k: k[1][0])
    p_corrs = [k[0][0] for k in s_p]
    after_windows1 = [str(k[1]) for k in s_p]
    s_s = sorted([k for k in zip(s_corrs, after_windows)], key=lambda k: k[1][0])
    s_corrs = [k[0][0] for k in s_s]
    after_windows2 = [str(k[1]) for k in s_p]
    return p_corrs, s_corrs, after_windows1, after_windows2

def plot_corrs(corrs, after_windows, after_windows_map, ax, title='', plot_name='',  after_window_info=None):

    p_corrs = [c[0] for c in corrs]
    s_corrs = [c[1] for c in corrs]
    final_plot_name = f'{plot_name}_{title}'
    after_windows = [after_windows_map[a.split("_")[-2]] for a in after_windows]
    
    p_corrs, s_corrs, after_windows1, after_windows2 = sort_rows_with_time(p_corrs, s_corrs, after_windows)

    ax[0].plot(after_windows1, p_corrs, '-o')
    ax[0].set_title(f'{final_plot_name} Pearsons Corr')
    ax[0].set(xlabel='Time (h)', ylabel='Correlation')
    ax[0].set_xticks(after_windows1)
    ax[0].grid()

    ax[1].plot(after_windows2, s_corrs, '-o')
    ax[1].set_title(f'{final_plot_name} Spearmans Corr')
    ax[1].set(xlabel='Time (h)', ylabel='Correlation')
    ax[1].set_xticks(after_windows2)
    ax[1].grid()
    

def plot_func(lab, presc, d, dirname, plot_dir, plot_dir1, window=(1,24), title='', unit='', labels=None, plot_name='', ax=None):    
    plot_data = d
    if ax is None:
        sns.regplot(x = "time", 
                y = 'data', 
                data = plot_data.sort_values(["time"]), 
                truncate=False)
        n = plot_data.shape[0]
        plt.title(lab+'<>'+presc+'- '+ title+ ' \nchange in lab measurment and time taken for change')
        plt.xlabel('Time (h)')
        plt.ylabel(f"{title} change in {lab} lab measurment ({unit})")
        if labels is not None:
            extra = Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
            plt.legend([extra for i in range(5)], (f'Pearson Correlation = {round(labels[0][0], 4)}', f'Pearson Correlation p-value = {round(labels[0][1], 4)}', f'Spearmans Correlation = {round(labels[1][0], 4)}', f'Spearmans Correlation p-value = {round(labels[1][1], 4)}', f'Number of data points = {n}'))
        if not os.path.isdir(os.path.join(plot_dir1, f"{lab}<>{presc}")):
            os.mkdir(os.path.join(plot_dir1, f"{lab}<>{presc}"))
        if dirname is None or dirname == "":
            plt.savefig(os.path.join(plot_dir1, f"{lab}<>{presc}", plot_name+".png"))
        else:
            if not os.path.isdir(os.path.join(plot_dir1, f"{lab}<>{presc}", dirname)):
                os.mkdir(os.path.join(plot_dir1, f"{lab}<>{presc}", dirname))
            plt.savefig(os.path.join(plot_dir1, f"{lab}<>{presc}", dirname, plot_name+".png"))
        plt.clf()
    
    else:
        sns.regplot(
                ax=ax,
                x = "time", 
                y = 'data', 
                data = plot_data.sort_values(["time"]), 
                truncate=False)
        n = plot_data.shape[0]
        ax.set_title(lab+'<>'+presc+'- '+ title+ ' \nchange in lab measurment and time taken for change')
        ax.set(xlabel='Time (h)', ylabel=f"{title} change in {lab} lab measurment ({unit})")
        ax.grid()
        if labels is not None:
            extra = Rectangle((0, 0), 1, 1, fc="w", fill=False, edgecolor='none', linewidth=0)
            ax.legend([extra for i in range(5)], (f'Pearson Correlation = {round(labels[0][0], 4)}', f'Pearson Correlation p-value = {round(labels[0][1], 4)}', f'Spearmans Correlation = {round(labels[1][0], 4)}', f'Spearmans Correlation p-value = {round(labels[1][1], 4)}', f'Number of data points = {n}'))

def remove_outlier(val, time_diff):
    val = pd.DataFrame(val)
    time_diff = pd.DataFrame(time_diff)
    
    # IQR
    Q1 = np.percentile(val, 25, method = 'midpoint')        
    Q3 = np.percentile(val, 75, method = 'midpoint')
    IQR = Q3 - Q1        
    
    # Upper bound
    upper = np.where(val >= (Q3+1.5*IQR))
    # Lower bound
    lower = np.where(val <= (Q1-1.5*IQR))

    # Filtering
    if len(upper) > 0:
        val.drop(upper[0], inplace = True)
        time_diff.drop(upper[0], inplace = True)
    if len(lower) > 0:
        val.drop(lower[0], inplace = True)
        time_diff.drop(lower[0], inplace = True)
    return val, time_diff

def check_med2(row):
    if row["HADM_ID"] in t_med2["HADM_ID"].to_list():
        if row["ITEMID"] in t_med2[t_med2["HADM_ID"]==row["HADM_ID"]]["ITEMID"].to_list():
            return True
    return False

def get_med2(row):
    temp = t_med2[t_med2["HADM_ID"]==row["HADM_ID"]] 
    return temp[temp["ITEMID"]==row["ITEMID"]].iloc[0]
from sklearn import datasets, linear_model, metrics

def get_normalized_trend(data):
    selected = data[['VALUENUM', 'hours_from_med']]
    if selected.shape[0]<2:
        return float("NaN")
    reg = linear_model.LinearRegression()
    reg.fit(np.array(data['hours_from_med']).reshape(-1,1), np.array(data['VALUENUM']).reshape(-1,1))
    return reg.coef_[0][0]

def get_normalized_trend_np(data):
    selected = data[['VALUENUM', 'hours_in']]
    print(selected)
    if selected.shape[0]<2:
        return float("NaN")
    print(np.array(data['hours_from_med']), np.array(data['VALUENUM']))
    t = np.polyfit(np.array(data['hours_from_med']), np.array(data['VALUENUM']), 1,full=True)
    coefficients, residuals, _, _, _ = t
    print(t)
    mse = residuals[0]/(len(selected.index))
    nrmse = np.sqrt(mse)/(selected.max() - selected.min())
    return 1

In [34]:
# Utils class
class AnalysisUtils:

    def __init__(self, data, res, gender="MF", age_b=0, age_a=100, ethnicity="WHITE", lab_mapping=None, load=True):
        '''
        Params
        data : path to dataset
        res : path to result output files
        gender : stratification param for gender
        age_b : stratification param for start of age group
        age_a : stratification param for end of age group
        ethnicity : stratification param for ethnicity
        lab_mapping : lab test mapping from mimic extract. Loaded externally and used in the class 
        '''
        self.data = data 
        self.res = res
        self.gender = gender
        self.age_b = age_b
        self.age_a = age_a
        self.ethnicity = ethnicity
        self.stratify_prefix = f"{age_b}-{age_a}_{gender}_{ethnicity}"

        self.res_dict_mapping_med = None
        self.d_m_l_doc = None
        if load:
            self.load_mappings()
        self.lab_mapping = lab_mapping

    def load_mappings(self):
        """
        Load Medication and Lab test name mappings from MIMIC Extract and Clinically Validated sources
        """
        self.d_m_l_doc = pd.read_csv(os.path.join(self.data, "mimiciii", "1.4","preprocessed", "mapping_med_itemid_doc.csv")).drop(columns=["Unnamed: 0"])
        dict_d_m_l = self.d_m_l_doc.to_dict("records")
        self.res_dict_mapping_med = {
            v:k["Medication"] for k in dict_d_m_l for v in [int(id) for id in k["ITEMID_with_manual"][1:-1].split(",") if id != '']
        }

    def generate_med_lab_pairs(self):
        """
        Generate medication and lab test pair names.
        """
        
        d_lab_map = {k:list(v.keys()) for k, v in self.lab_mapping.items()}
        indexes = list(self.d_m_l_doc.groupby(["Medication", "lab result"]).count().index)

        med_vals = [k[0].strip() for k in indexes]
        labtest_vals = [k[1].strip() for k in indexes]
        med_vals.append('Insulin - Regular')
        labtest_vals.append('glucose')

        med_vals.append('Packed Red Blood Cells')
        labtest_vals.append('Hemoglobin')

        med_vals.append('Calcium Gluconate (CRRT)')
        labtest_vals.append('calcium')

        med_vals.append('Packed Red Blood Cells')
        labtest_vals.append('Red blood cell')

        med_vals.append('Packed Red Blood Cells')
        labtest_vals.append('Hematocrit')

        med_vals.append('Albumin')
        labtest_vals.append('Albumin')

        med_vals.append('Albumin')
        labtest_vals.append('Hematocrit')

        med_vals.append('Albumin 5%')
        labtest_vals.append('Albumin')

        med_vals.append('Albumin 5%')
        labtest_vals.append('Hematocrit')

        med_vals.append('Albumin 25%')
        labtest_vals.append('Albumin')

        med_vals.append('Albumin 25%')
        labtest_vals.append('Hematocrit')

        med_vals.append('Magnesium Sulfate')
        labtest_vals.append('Magnesium')
        l_med_lab = [(i[0], k) for i in zip(med_vals, labtest_vals) for k in d_lab_map[i[1]]]
        labtest_vals_new = [k[1] for k in l_med_lab]
        med_vals_new = [k[0] for k in l_med_lab]
        return med_vals_new, labtest_vals_new

In [5]:

class DatasetParser(ABC):
    """ Interface to define a parser for datasets."""

    @abstractmethod
    def load_med1(self) -> pd.DataFrame:
        """ Load data of 1st medication administration during the 1st admimission of a patient """
        ...

    @abstractmethod
    def load_med2(self) -> pd.DataFrame:
        """ Load data of 2nd medication administration during the 1st admimission of a patient """
        ...

    @abstractmethod
    def load_lab(self, h_med_adm1: pd.DataFrame, h_med_adm2: pd.DataFrame) -> pd.DataFrame:
        """ Load data on all lab tests taken during the 1st admimission of a patient """
        ...

    @abstractmethod
    def parse(self, use_pairs: bool) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        """ Load med1, med2 and lab test data """
        ...

In [28]:

class MIMICParser(DatasetParser, AnalysisUtils):
    def __init__(self, data, res, gender="MF", age_b=0, age_a=100, ethnicity="WHITE", lab_mapping=None):
       AnalysisUtils.__init__(self, data, res, gender=gender, age_b=age_b, age_a=age_a, ethnicity=ethnicity, lab_mapping=lab_mapping)

    def load_med1(self):
        """
        Load 1st Medication data
        """
        med1 = pd.read_csv(os.path.join(self.data, "mimiciii/1.4/preprocessed", "med1_vectorized.csv"))
        h_adm_1 = med1.sort_values(["HADM_ID", "STARTTIME"]).groupby("SUBJECT_ID").nth(0)["HADM_ID"].to_list()
        med1 = med1[med1.HADM_ID.isin(h_adm_1)]
        med1 = med1.drop(columns=["Unnamed: 0"])
        med1 = med1[med1["AGE"]>=self.age_b]
        med1 = med1[med1["AGE"]<=self.age_a]
        med1 = med1[med1["GENDER"]==self.gender] if self.gender != "MF" else med1
        med1 = med1[med1["ETHNICITY"]==self.ethnicity]
        med1["MIMICExtractLabel"] = med1.apply(lambda r: self.res_dict_mapping_med[r["ITEMID"]] if r["ITEMID"] in self.res_dict_mapping_med else r["LABEL"], axis=1)
        med1["STARTTIME"] = pd.to_datetime(med1["STARTTIME"])
        med1["ENDTIME"] = pd.to_datetime(med1["ENDTIME"])
        med1["ADMITTIME"] = pd.to_datetime(med1["ADMITTIME"])
        med1["MedTimeFromAdmit"] = med1["ENDTIME"]-med1["ADMITTIME"]
        med1["hours_in"] = med1["MedTimeFromAdmit"].dt.total_seconds()/3600
        return med1, h_adm_1
    
    def load_med2(self):
        """
        Load 2nd Medication data
        """
        med2 = pd.read_csv(os.path.join(self.data, "mimiciii/1.4/preprocessed", "med2_vectorized.csv"))
        h_adm_2 = med2.sort_values(["SUBJECT_ID", "STARTTIME"]).groupby("SUBJECT_ID").nth(0)["HADM_ID"].to_list()
        med2 = med2.drop(columns=["Unnamed: 0"])
        med2 = med2[med2["AGE"]>=self.age_b]
        med2 = med2[med2["AGE"]<=self.age_a]
        med2 = med2[med2["GENDER"]==self.gender] if self.gender != "MF" else med2
        med2 = med2[med2["ETHNICITY"]==self.ethnicity]
        med2["MIMICExtractLabel"] = med2.apply(lambda r: self.res_dict_mapping_med[r["ITEMID"]] if r["ITEMID"] in self.res_dict_mapping_med else r["LABEL"], axis=1)
        med2["STARTTIME"] = pd.to_datetime(med2["STARTTIME"])
        med2["ENDTIME"] = pd.to_datetime(med2["ENDTIME"])
        med2["ADMITTIME"] = pd.to_datetime(med2["ADMITTIME"])
        med2["MedTimeFromAdmit"] = med2["ENDTIME"]-med2["ADMITTIME"]
        return med2, h_adm_2

    def load_lab(self, h_med_adm1, h_med_adm2):
        """
        Load lab test data from LABEVENTS and CHARTEVENTS tables
        """
        labs = pd.read_csv(os.path.join(self.data, "mimiciii/1.4/preprocessed", "lab_patient_data_mimic_extract_2.csv"))
        t = labs[labs["AGE"]<100]
        t = pd.DataFrame(t["AGE"].value_counts()).reset_index().sort_values(["index"])
        plt.bar(t["index"], t["AGE"])
        plt.xlabel('Age')
        plt.ylabel('Count of Patients')
        plt.title('Distribution of age')
        labs = labs.drop(columns=["Unnamed: 0"])
        labs = labs[labs.HADM_ID.isin(h_med_adm1+h_med_adm2)]
        labs = labs[labs["AGE"]>=self.age_b]
        labs = labs[labs["AGE"]<=self.age_a]
        labs = labs[labs["GENDER"]==self.gender] if self.gender != "MF" else labs
        labs = labs[labs["ETHNICITY"]==self.ethnicity]
        labs["CHARTTIME"] = pd.to_datetime(labs["CHARTTIME"])
        labs["ADMITTIME"] = pd.to_datetime(labs["ADMITTIME"])
        labs["LabTimeFromAdmit"] = labs["CHARTTIME"]-labs["ADMITTIME"]
        labs["hours_in"] = labs["LabTimeFromAdmit"].dt.total_seconds()/3600
        return labs

    def parse(self, use_pairs=True):
        """
        Loading medication and lab test. Performing basic preprocessing on data.
        """
        med1, hadm1 = self.load_med1()
        med2, hadm2 = self.load_med2()
        labs = self.load_lab(hadm1, hadm2)
        
        t_med1, t_med2, t_labs = med1.copy(), med2.copy(), labs.copy()
        if use_pairs:
            med_vals_new, labtest_vals_new = self.generate_med_lab_pairs()
            t_med1 = med1[med1["MIMICExtractLabel"].isin(med_vals_new)]
            t_med2 = med2[med2["MIMICExtractLabel"].isin(med_vals_new)]
            t_labs = labs[labs["MIMICExtractName"].isin(labtest_vals_new)]
            
        t_med1 = t_med1.rename(columns={"LABEL":"OldLabel", "ITEMID":"OldITEMID", "MIMICExtractLabel":"ITEMID"})
        t_med2 = t_med2.rename(columns={"LABEL":"OldLabel", "ITEMID":"OldITEMID", "MIMICExtractLabel":"ITEMID"})
        t_labs = t_labs.rename(columns={"LABEL":"OldLabel", "ITEMID":"OldITEMID", "MIMICExtractName":"ITEMID"})

        return t_med1, t_med2, t_labs

In [56]:
class HiRiDParser(DatasetParser, AnalysisUtils):
    def __init__(self, data, res, gender="MF", age_b=0, age_a=100):
        AnalysisUtils.__init__(self, data=data, res=res, gender=gender, age_b=age_b, age_a=age_a, load=False)
        self.load_util_datasets()
        self.load_med()

    def load_util_datasets(self):
        path1 = self.res
        self.g_table = pd.read_csv(os.path.join(path1, 'general_table.csv'))
        h_var_ref = pd.read_csv(os.path.join(path1, 'hirid_variable_reference.csv'))
        self.h_var_ref = h_var_ref.rename(columns={"ID":"variableid"})

        self.h_var_ref_pre = pd.read_csv(os.path.join(path1, 'hirid_variable_reference_preprocessed.csv'))
        self.o_var_ref = pd.read_csv(os.path.join(path1, 'ordinal_vars_ref.csv'))

    def load_med(self):

        pharma_records_paths = [i for iq, i in enumerate(os.walk(os.path.join(self.data, "pharma_records"))) if iq==1][0][2]
        pharma_records = pd.concat([pd.read_csv(os.path.join(self.data, "pharma_records", 'csv', file)) for file in pharma_records_paths])
        pharma_records = pharma_records.rename(columns={"pharmaid":"variableid"})

        pharma_records_with_name = pd.merge(pharma_records, self.h_var_ref, on="variableid", how="inner")
        pharma_records_with_name = pd.merge(pharma_records_with_name, self.g_table, on="patientid", how="inner")
        pharma_records_with_name.givenat = pd.to_datetime(pharma_records_with_name.givenat)
        self.pharma_records_with_name = pharma_records_with_name.rename(columns={
            "givenat":"STARTTIME",
            "admissiontime":"ADMITTIME",
            "enteredentryat":"ENDTIME",
            "variableid":"ITEMID",
            "patientid":"HADM_ID",
            "Variable Name":"LABEL",
            "age":"AGE",
            "sex":"GENDER",
        })

    def load_med1(self):
        """
        Load 1st Medication data
        """

        med1 = self.pharma_records_with_name.sort_values(["HADM_ID", "STARTTIME"]).groupby(["HADM_ID", "ITEMID"]).nth(0).reset_index()

        # stratification
        h_adm_1 = med1["HADM_ID"].to_list()
        med1 = med1[med1["AGE"]>=self.age_b]
        med1 = med1[med1["AGE"]<=self.age_a]
        med1 = med1[med1["GENDER"]==self.gender] if self.gender != "MF" else med1

        med1["STARTTIME"] = pd.to_datetime(med1["STARTTIME"])
        med1["ENDTIME"] = pd.to_datetime(med1["ENDTIME"])
        med1["ADMITTIME"] = pd.to_datetime(med1["ADMITTIME"])
        med1["MedTimeFromAdmit"] = med1["STARTTIME"]-med1["ADMITTIME"]
        med1["hours_in"] = med1["MedTimeFromAdmit"].dt.total_seconds()/3600
        self.med1 = med1

        return med1, h_adm_1
    
    def load_med2(self):
        """
        Load 2nd Medication data
        """
        med2 = self.pharma_records_with_name.sort_values(["HADM_ID", "STARTTIME"]).groupby(["HADM_ID", "ITEMID"]).nth(1).reset_index()

        # stratification
        h_adm_2 = med2["HADM_ID"].to_list()
        med2 = med2[med2["AGE"]>=self.age_b]
        med2 = med2[med2["AGE"]<=self.age_a]
        med2 = med2[med2["GENDER"]==self.gender] if self.gender != "MF" else med2

        med2["STARTTIME"] = pd.to_datetime(med2["STARTTIME"])
        med2["ENDTIME"] = pd.to_datetime(med2["ENDTIME"])
        med2["ADMITTIME"] = pd.to_datetime(med2["ADMITTIME"])
        med2["MedTimeFromAdmit"] = med2["STARTTIME"]-med2["ADMITTIME"]
        med2["hours_in"] = med2["MedTimeFromAdmit"].dt.total_seconds()/3600
        self.med2 = med2

        return med2, h_adm_2
    
    def read_lab(self, path, adm):
        labs = pd.read_csv(path)
        labs = labs[labs.patientid.isin(adm)]
        return labs

    def load_lab(self, h_med_adm1, h_med_adm2, n_parts=50):
        """
        Load lab test data from LABEVENTS and CHARTEVENTS tables
        """
        observation_tables_paths = sorted([i for iq, i in enumerate(os.walk(os.path.join(self.data, "observation_tables 2"))) if iq==1][0][2])
        observation_tables_part = pd.concat([self.read_lab(os.path.join(self.data, "observation_tables 2", 'csv', file), h_med_adm1+h_med_adm2) for file in observation_tables_paths[:n_parts]])

        observation_tables_part_with_name = pd.merge(observation_tables_part, self.h_var_ref, on="variableid", how="inner")
        observation_tables_part_with_name = pd.merge(observation_tables_part_with_name, self.g_table, on="patientid", how="inner")
        observation_tables_part_with_name.datetime = pd.to_datetime(observation_tables_part_with_name.datetime)
        observation_tables_part_with_name
        observation_tables_part_with_name
        observation_tables_part_with_name["Variable Name"].value_counts()
        observation_tables_part_with_name = observation_tables_part_with_name.rename(columns={
            "datetime":"CHARTTIME",
            "admissiontime":"ADMITTIME",
            "variableid":"ITEMID",
            "patientid":"HADM_ID",
            "Variable Name":"LABEL",
            "value":"VALUENUM",
            "Unit":"VALUEUOM",
            "age":"AGE",
            "sex":"GENDER"
        })
        labs = observation_tables_part_with_name.copy()

        labs = labs[labs["AGE"]>=self.age_b]
        labs = labs[labs["AGE"]<=self.age_a]
        labs = labs[labs["GENDER"]==self.gender] if self.gender != "MF" else labs
        
        labs["CHARTTIME"] = pd.to_datetime(labs["CHARTTIME"])
        labs["ADMITTIME"] = pd.to_datetime(labs["ADMITTIME"])
        labs["LabTimeFromAdmit"] = labs["CHARTTIME"]-labs["ADMITTIME"]
        labs["hours_in"] = labs["LabTimeFromAdmit"].dt.total_seconds()/3600
        return labs

    def parse(self, use_pairs=False):
        """
        Loading medication and lab test. Performing basic preprocessing on data.
        """
        med1, hadm1 = self.load_med1()
        med2, hadm2 = self.load_med2()
        labs = self.load_lab(hadm1, hadm2)
        
        t_med1, t_med2, t_labs = med1.copy(), med2.copy(), labs.copy()
        if use_pairs:
            med_vals_new, labtest_vals_new = self.generate_med_lab_pairs()
            t_med1 = med1[med1["LABEL"].isin(med_vals_new)]
            t_med2 = med2[med2["LABEL"].isin(med_vals_new)]
            t_labs = labs[labs["LABEL"].isin(labtest_vals_new)]
            
        t_med1 = t_med1.rename(columns={"ITEMID":"OldITEMID", "LABEL":"ITEMID"})
        t_med2 = t_med2.rename(columns={"ITEMID":"OldITEMID", "LABEL":"ITEMID"})
        t_labs = t_labs.rename(columns={"ITEMID":"OldITEMID", "LABEL":"ITEMID"})

        return t_med1, t_med2, t_labs

In [67]:

class DatasetQuerier(AnalysisUtils):

    def __init__(self, data, res, gender="MF", age_b=0, age_a=100, ethnicity="WHITE", lab_mapping=None):
        self.final = None
        self.temp = None
        super().__init__(data, res, gender=gender, age_b=age_b, age_a=age_a, ethnicity=ethnicity, lab_mapping=lab_mapping)
    
    def check_med2(self, t_med2, row):
        """
        Check if a 2nd medication was administered to patients
        """
        if row["HADM_ID"] in t_med2["HADM_ID"].to_list():
            if row["ITEMID"] in t_med2[t_med2["HADM_ID"]==row["HADM_ID"]]["ITEMID"].to_list():
                return True
        return False

    def get_med2(self, t_med2, row):
        '''
        Return 2nd medication data
        '''
        temp = t_med2[t_med2["HADM_ID"]==row["HADM_ID"]] 
        return temp[temp["ITEMID"]==row["ITEMID"]].iloc[0]
    
    def get_vals(self, r, t_labs, t_med1, t_med2, before_windows, after_windows):
        """
        Calculate the lab test values in time windows before and after medication administration. Return a dataframe with labtest values of before and after windows as a dict
        Params: 
        - before_windows: list of tuples (each tuple is a window)
        - after_windows: list of tuples (each tuple is a window)
        """

        row = r.copy()
        for b_w in before_windows:
            lab_vals = t_labs[t_labs["HADM_ID"]==row["HADM_ID"]]
            lab_vals = lab_vals[lab_vals["LabTimeFromAdmit"].dt.total_seconds()<row["MedTimeFromAdmit"].total_seconds()]

            b_window_start = row["MedTimeFromAdmit"].total_seconds() - (b_w[0]*3600)
            b_window_end = row["MedTimeFromAdmit"].total_seconds() - (b_w[1])*3600
            lab_vals = lab_vals[lab_vals["LabTimeFromAdmit"].dt.total_seconds()<b_window_start]
            lab_vals = lab_vals[lab_vals["LabTimeFromAdmit"].dt.total_seconds()>b_window_end]
            lab_vals["hours_from_med"] = (row["STARTTIME"]-lab_vals["CHARTTIME"]).dt.total_seconds()/3600
            lab_vals = lab_vals.sort_values(["ITEMID", "hours_from_med"])

            t = lab_vals.groupby(["ITEMID"]).count()[["HADM_ID"]]
            val_counts_m = t[t["HADM_ID"]>=1]
            if val_counts_m.shape[0]==0:
                row[f"before_abs_{b_w}"] = {}
                row[f"before_mean_{b_w}"] = {}
                row[f"before_trends_{b_w}"] = {}
                row[f"before_time_{b_w}"] = {}
            else:
                l_m = lab_vals[lab_vals.ITEMID.isin(val_counts_m.index)]
                row[f"before_abs_{b_w}"] = l_m.groupby(["ITEMID"])[["VALUENUM", "hours_from_med"]].first()["VALUENUM"].dropna().to_dict()
                row[f"before_mean_{b_w}"] = l_m.groupby(["ITEMID"])[["VALUENUM"]].mean()["VALUENUM"].dropna().to_dict()
                row[f"before_trends_{b_w}"] = l_m[["VALUENUM", "hours_from_med", "ITEMID"]].dropna().groupby(["ITEMID"])[["VALUENUM", "hours_from_med"]].apply(lambda r : get_normalized_trend(r)).dropna().to_dict()
                row[f"before_time_{b_w}"] = l_m.groupby(["ITEMID"])[["VALUENUM", "hours_from_med"]].first()["hours_from_med"].dropna().to_dict()

        for a_w in after_windows:

            lab_vals = t_labs[t_labs["HADM_ID"]==row["HADM_ID"]]
            med2_bool = self.check_med2(t_med2, row)
            lab_vals = lab_vals[lab_vals["LabTimeFromAdmit"].dt.total_seconds()>row["MedTimeFromAdmit"].total_seconds()]
            a_window_start = row["MedTimeFromAdmit"].total_seconds() + (a_w[0]*3600)
            a_window_end = row["MedTimeFromAdmit"].total_seconds() + (a_w[1])*3600
            lab_vals = lab_vals[lab_vals["LabTimeFromAdmit"].dt.total_seconds()>a_window_start]
            lab_vals = lab_vals[lab_vals["LabTimeFromAdmit"].dt.total_seconds()<a_window_end]
            lab_vals["hours_from_med"] = (lab_vals["CHARTTIME"]-row["ENDTIME"]).dt.total_seconds()/3600
            lab_vals = lab_vals.sort_values(["ITEMID", "hours_from_med"])

            if med2_bool:
                med2_val = self.get_med2(t_med2, row)
                lab_vals = lab_vals[lab_vals["LabTimeFromAdmit"].dt.total_seconds()<med2_val["MedTimeFromAdmit"].total_seconds()]
            
            t = lab_vals.groupby(["ITEMID"]).count()[["HADM_ID"]]
            
            val_counts_m = t[t["HADM_ID"]>=1]
            if val_counts_m.shape[0]==0:
                row[f"after_abs_{a_w}"] = {}
                row[f"after_mean_{a_w}"] = {}
                row[f"after_trends_{a_w}"] = {}
                row[f"after_time_{a_w}"] = {}
            else:
                l_m = lab_vals[lab_vals.ITEMID.isin(val_counts_m.index)]
                row[f"after_abs_{a_w}"] = l_m.groupby(["ITEMID"])[["VALUENUM", "hours_from_med"]].first()["VALUENUM"].dropna().to_dict()
                row[f"after_mean_{a_w}"] = l_m.groupby(["ITEMID"])[["VALUENUM"]].mean()["VALUENUM"].dropna().to_dict()
                row[f"after_trends_{b_w}"] = l_m[["VALUENUM", "hours_from_med", "ITEMID"]].dropna().groupby(["ITEMID"])[["VALUENUM", "hours_from_med"]].apply(lambda r : get_normalized_trend(r)).dropna().to_dict()
                row[f"after_time_{a_w}"] = l_m.groupby(["ITEMID"])[["VALUENUM", "hours_from_med"]].first()["hours_from_med"].dropna().to_dict()
                
        return row
    
    def generate_med_lab_data(self, t_labs, t_med1, t_med2, before_windows, after_windows):
        """
        Generate lab test values in before and after windows of medication
        """
        
        all_types = set(["abs", "time"])
        cols_b = [f"before_{t}_{b_w}" for b_w in before_windows for t in all_types]
        cols_a = [f"after_{t}_{a_w}" for a_w in after_windows for t in all_types]
        cols = cols_b.copy()
        cols.extend(cols_a)
        temp = t_med1.copy()

        temp = temp.apply(lambda r : self.get_vals(r, t_labs, t_med1, t_med2, before_windows, after_windows), axis=1)
        self.temp = temp
        temp.to_csv(os.path.join(self.data, "mimiciii/1.4/preprocessed", f"before_after_windows_main_med_lab_first_val_{self.stratify_prefix}_doc_eval_new_win.csv"))
        
        col_vals = []
        for col in cols:
            col_vals.append(
                temp.assign(dict=temp[col].map(lambda d: d.items())).explode("dict", ignore_index=True).assign(
                    LAB_ITEMID=lambda df: df.dict.str.get(0),
                    temp=lambda df: df.dict.str.get(1)
                ).drop(columns=["dict"]+cols).astype({'temp':'float64'}).rename(columns={"temp":f"{col}_sp"}).dropna(subset=["LAB_ITEMID"])
            )
        for i in range(1, len(col_vals)):
            col_vals[i] = pd.merge(col_vals[i-1], col_vals[i], how="outer", on=list(t_med1.columns)+["LAB_ITEMID"])
        
        final = col_vals[-1][list(t_med1.columns)+["LAB_ITEMID"]+[f"{col}_sp" for col in cols]]
        final["LAB_NAME"] = final["LAB_ITEMID"]
        final = final.rename(columns={"ITEMID":"MED_NAME"})
        self.final = final
        
        final.to_csv(os.path.join(self.data, "mimiciii/1.4/preprocessed", f"before_after_windows_main_med_lab_trends_first_val_{self.stratify_prefix}_doc_eval_win.csv"))

        return final, temp
    
    def query(self):
        """
        Query lab test value for a given medication
        """
        pass


In [12]:
class ClinicalDiscoveryAnalysis(AnalysisUtils):
    def __init__(self):
        pass

In [24]:

class ClinicalPlotAnalysis(AnalysisUtils):

    def __init__(self, data, res, gender="MF", age_b=0, age_a=100, ethnicity="WHITE", lab_mapping=None):
       super().__init__(data, res, gender=gender, age_b=age_b, age_a=age_a, ethnicity=ethnicity, lab_mapping=lab_mapping)
    
    def make_plot_dirs(self):
        """
        Create folders to store output plots
        """
        plot_dir = os.path.join(self.res, f"plots_{self.stratify_prefix}_doc_eval")
        if not os.path.isdir(plot_dir):
            os.mkdir(plot_dir)
        plot_dir1 = os.path.join(self.res, f"plots_{self.stratify_prefix}_doc_eval_all_window")
        if not os.path.isdir(plot_dir1):
            os.mkdir(plot_dir1)
        return plot_dir, plot_dir1
    
    def generate_plot_data(self, final, before_windows, after_windows):
        """
        Generate plot data for each before and after window
        """
        t_final = final.copy()
        plot_data = {}

        # generate column names
        all_types = set(["abs", "time"])
        cols_b = [f"before_{t}_{b_w}" for b_w in before_windows for t in all_types]
        cols_a = [f"after_{t}_{a_w}" for a_w in after_windows for t in all_types]
        
        # get data for each before and after window
        for b in [f"{c}_sp" for c in cols_b]:
            if b in t_final.columns:
                plot_data[b] = []
                for a in [f"{c}_sp" for c in cols_a]:
                    if a in t_final.columns:
                        plot_data[b].append(t_final.dropna(subset=[a,b]))
        plot_data_concat = {}
        for i in plot_data:
            plot_data_concat[i] = pd.concat(plot_data[i])

        # Generate columns names
        a_t = ["abs", "time"]
        cols_b_sp = [(f"before_{a_t[0]}_{b_w}_sp", f"before_{a_t[1]}_{b_w}_sp") for b_w in before_windows]
        cols_a_sp = [(f"after_{a_t[0]}_{a_w}_sp", f"after_{a_t[1]}_{a_w}_sp") for a_w in after_windows]
        cols_sp = cols_b_sp.copy()
        cols_sp.extend(cols_a_sp)
        
        # get data for each before and after window
        t_final = final.copy()
        plot_data = {}
        for b in cols_b_sp:
            if b[0] in t_final.columns:
                plot_data[b[0]] = {}
                for a in cols_a_sp: 
                    if a[0] in t_final.columns:
                        plot_data[b[0]][a[0]] = t_final.dropna(subset=[a[0], a[1], b[0], b[1]])
        pickle.dump(plot_data, open(f"plot_bw_aw_med_lab_data_{self.stratify_prefix}_doc_eval_win.pkl", "wb"))
        
        # get data for each medication<>labtest pair
        cols_d = dict(cols_sp)
        p_data = {}
        for k in plot_data:
            for i, (k_a, data) in enumerate(plot_data[k].items()):
                
                t_data = data.set_index([data["MED_NAME"], data["LAB_NAME"]])        
                med_lab_pairs = t_data.index
                
                for med_lab_pair in med_lab_pairs.unique():
                    
                    if med_lab_pair not in p_data.keys():
                        p_data[med_lab_pair] = {}
                    if k not in p_data[med_lab_pair].keys():
                        p_data[med_lab_pair][k] = []
                    
                    t_d = t_data.loc[med_lab_pair][['SUBJECT_ID','HADM_ID', k, k_a, cols_d[k_a]]]
                    t_d['abs'] = t_d[k_a]-t_d[k]
                    t_d['percent'] = (t_d['abs']/t_d[k])*100
                    t_d['ratio'] = t_d[k_a]/t_d[k]
                    t_d.replace([np.inf, -np.inf], np.nan, inplace=True)
                    t_d = t_d.dropna()
                    p_data[med_lab_pair][k].append(t_d)

        pickle.dump(p_data, open(f"plot_med_lab_bw_aw_data_{self.stratify_prefix}_doc_eval_win.pkl", "wb"))
        return p_data
        

    def plot(self, final, t_labs, before_windows, after_windows):
        """
        Plots the correlation and trend plots for before and after windows. Returns a data frame with medication labtest pairs and change over time.
        Three different type of plots:
        1. Correlation plot (across all after windows for a before window) - plots_corr
        2. Large trend/data plot (across all after windows for a before window) - plot_func (type 1)
        3. Smaller trend/data plots (for each after window and before window) - plot_func (type 2)
        """

        plot_dir, plot_dir1 = self.make_plot_dirs()
        type_map = {
            'abs': "Absolute",
            'percent': "Percentage",
            'ratio': "Ratio"
        }

        med_vals_new, labtest_vals_new = self.generate_med_lab_pairs()

        lab_units_mapping = t_labs.groupby(["ITEMID", "VALUEUOM"]).count()["SUBJECT_ID"].reset_index().groupby("ITEMID").nth(0)[["VALUEUOM"]]
        lab_units_mapping_dict = lab_units_mapping.to_dict()['VALUEUOM']

        p_data = self.generate_plot_data(final, before_windows, after_windows)
        
        n_p_data = {}
        if len([i for i in zip(med_vals_new, labtest_vals_new)]) < len(p_data):
            for k in [i for i in zip(med_vals_new, labtest_vals_new)]:
                if k in p_data:
                    n_p_data[k] = p_data[k]
        old_p_data = p_data.copy()
        p_data = n_p_data

        before_windows_map = {f"({str(b_w)[1:-1]})":b_w for b_w in before_windows}
        after_windows_map = {f"({str(a_w)[1:-1]})":a_w for a_w in after_windows}
        
        # For each medication and lab test pair plot before and after window correlations and trends
        types = ['abs', 'percent', 'ratio']
        type2 = ""
        stratify_prefix = self.stratify_prefix
        corrs_data_dict = []
        for k, v in p_data.items():
            for key in v:
                if "/" in k[0]:
                    presc = k[0].split("/")[0]
                else:
                    presc = k[0]
                lab = k[1]
                before_window = before_windows_map[key.split("_")[-2]]

                fig_all, ax_all = plt.subplots(3, figsize=(20, 20))
                fig_all.suptitle(f'{lab}<>{presc} (before window = {str(before_window)})')

                if not os.path.isdir(os.path.join(plot_dir, f"{lab}<>{presc}")):
                    os.mkdir(os.path.join(plot_dir, f"{lab}<>{presc}"))
                
                dirname=f"bw_{before_window}"
                if not os.path.isdir(os.path.join(plot_dir, f"{lab}<>{presc}", dirname)):
                    os.mkdir(os.path.join(plot_dir, f"{lab}<>{presc}", dirname))
                
                #  Iterating over the typw of analysis : ["Absolute", "Percentage", "Ratio"]
                for i, type1 in enumerate(types):
                    
                    # Get data type2, remove outliers and calculate correlation
                    plot_name = f"{lab}<>{presc}_{key}_{type1}"
                    data_vals = [d[[list(d.columns)[-4], type1]].rename(columns={list(d.columns)[-4] : "time"}) for d in v[key] if type(d) != pd.Series]
                    after_names = [list(d.columns)[3] for d in v[key] if type(d) != pd.Series]
                    type2 = type1
                    if len(data_vals)!=len(after_names):
                        print(data_vals)
                        print(after_names)
                        print()
                        continue
                    if len(data_vals)==0:
                        continue
                    d = pd.concat(data_vals)
                    if d.shape[0]<2:
                        continue
                    if d.shape[0]>1:
                        d1, d2 = remove_outlier(d[type2], d["time"])
                    else:
                        d1, d2 = d[[type2]], d[["time"]]
                    d = pd.concat([d1, d2], axis=1)

                    #  Calculate correlation ovver all after windows
                    p_corr = pearsonr(d1[type2], d2["time"])
                    s_corr = spearmanr(d1[type2], d2["time"])

                    # Get units for the plot and plot overall data plot
                    unit = lab_units_mapping_dict[lab] if lab in lab_units_mapping_dict else ""
                    plot_func(lab, presc, d[[type2, "time"]].rename(columns={type2:"data"}), dirname="", labels=(p_corr, s_corr), plot_dir=plot_dir, plot_dir1=plot_dir1, unit=unit, title=f"bw{before_window} {type_map[type2]}", plot_name=f"{plot_name}", ax=ax_all[i])

                    # Correlation plots 
                    fig_corrs, ax_corrs = plt.subplots(2, figsize=(20, 20))
                    fig_corrs.suptitle(f'{lab}<>{presc} {type2} corrs') 
                    corrs = []
                    data_t = []
                    temp_after_names = after_names.copy()
                    for i, d in enumerate(data_vals):
                        if d.shape[0]<2:
                            temp_after_names.remove(after_names[i])
                            continue
                        if d.shape[0]>1:
                            d1, d2 = remove_outlier(d[type2], d["time"])
                        else:
                            d1, d2 = d[[type2]], d[["time"]]
                        p_corr = pearsonr(d1[type2], d2["time"])
                        s_corr = spearmanr(d1[type2], d2["time"])
                        corrs.append((p_corr, s_corr))
                        data_t.append([d1, d2])
                    after_names = temp_after_names
                    plot_corrs(corrs, after_names, after_windows_map, ax_corrs, title=type2, plot_name=plot_name)
                    
                    # Make correlation plot with all windows
                    fig_corrs.savefig(os.path.join(plot_dir, f"{lab}<>{presc}", dirname, f"{plot_name}_{type2}_{stratify_prefix}_corrs.png"))
                    fig_corrs.clf()

                    for d, a, c, t in zip(data_vals, after_names, corrs, data_t):
                        d = pd.concat(t, axis=1)
                        p_corr = c[0]
                        s_corr = c[1]
                        after_window = after_windows_map[a.split("_")[-2]]

                        # Make plot with data points in a single window
                        plot_func(lab, presc, d[[type2, "time"]].rename(columns={type2:"data"}), dirname=dirname, plot_dir=plot_dir, plot_dir1=plot_dir1, labels=c, unit=unit, title=f"bw{before_window} aw{after_window} {type_map[type2]}", plot_name=f"{plot_name} bw{before_window} aw{after_window}")
                        corrs_data_dict.append({
                            "lab" : lab,
                            "med": presc,
                            "bw": before_window,
                            "aw": after_window,
                            "Type": type_map[type2],
                            "Pearson Correlation": p_corr[0],
                            "Pearson Correlation (p-value)": p_corr[1],
                            "Spearmans Correlation ": s_corr[0],
                            "Spearmans Correlation (p-value)": s_corr[1],
                            "Num of Data Points (n)": d.shape[0]
                        })

                fig_all.savefig(os.path.join(plot_dir, f"{lab}<>{presc}", dirname, f"{plot_name}_{stratify_prefix}.png"))
                fig_all.clf()
        
        # Save correlation values for each type, after and before window
        corrs_data_df = pd.DataFrame(corrs_data_dict)
        corrs_data_df.to_csv(os.path.join(plot_dir, f"corrs_data_{stratify_prefix}.csv"))
        return corrs_data_df


## Setup Config

### Input - Output

In [19]:
# Input - Output config
#  Add your own drive paths

# MIMIC
# data = "/gdrive/MyDrive/TAU/Code/DrugLab/data"
# res = "/gdrive/MyDrive/TAU/Code/DrugLab/results"
data = "/Volumes/GoogleDrive/My Drive/TAU/Code/DrugLab/data"
res = "/Volumes/GoogleDrive/My Drive/TAU/Code/DrugLab/results"

# HIRID
# raw_path = '/gdrive/MyDrive/TAU/Code/DrugLab/data/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/'
# res_path = '/gdrive/MyDrive/TAU/Code/DrugLab/data/hirid-a-high-time-resolution-icu-dataset-1.1.1'
raw_path = '/Volumes/GoogleDrive/My Drive/TAU/Code/DrugLab/data/hirid-a-high-time-resolution-icu-dataset-1.1.1/raw_stage/'
res_path = '/Volumes/GoogleDrive/My Drive/TAU/Code/DrugLab/data/hirid-a-high-time-resolution-icu-dataset-1.1.1'

### Stratification

In [13]:
# Stratification Config
gender="MF"
age_b=40
age_a=80 
ethnicity="WHITE" 

In [14]:
lab_mapping=lab_mapping

In [15]:
before_windows = [(0,12), (0,6)]
after_windows = [(0,1), (1,2), (2,3), (3,4), (4,5), (5,6), (6,7), (7,8), (8,9), (9,10), (10,11), (11,12)]

## MIMIC

### MIMIC Parser

In [None]:
mimic_parser = MIMICParser(data=data, res=res, gender=gender, age_b=age_b, age_a=age_a, ethnicity=ethnicity, lab_mapping=lab_mapping)
m_med1, m_med2, m_labs = mimic_parser.parse()

### Query Data

In [None]:
mimic_data_querier = DatasetQuerier(
    data = data,
    res = res,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
    lab_mapping=lab_mapping
)

In [None]:
m_final_lab_med_data = mimic_data_querier.generate_med_lab_data(m_labs, m_med1, m_med2, before_windows, after_windows)

### Discovery Analysis

### Plot Analysis

In [None]:
plotter = ClinicalPlotAnalysis(
    data = data,
    res = res,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
    lab_mapping=lab_mapping
)

In [None]:
m_corrs_data_df = plotter.plot(m_final_lab_med_data, m_labs, before_windows=before_windows, after_windows=after_windows)

## HIRID

### HiRiD Parser

In [42]:
hirid_parser = HiRiDParser(data=raw_path, res=res_path, gender=gender, age_b=age_b, age_a=age_a)

In [45]:
h_med1, h_med2, h_labs = hirid_parser.parse()

  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
  labs = pd.read_csv(path)
 

### Query Data

In [68]:
hirid_data_querier = DatasetQuerier(
    data = data,
    res = res,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity=ethnicity, 
    lab_mapping=lab_mapping
)

In [69]:
final_h_final_lab_med_data, raw_h_final_lab_med_data = hirid_data_querier.generate_med_lab_data(h_labs, h_med1, h_med2, before_windows, after_windows)

: 

### Discovery Analysis

### Plot Analysis

In [None]:
h_plotter = ClinicalPlotAnalysis(
    data = data,
    res = res,
    gender=gender, 
    age_b=age_b, 
    age_a=age_a, 
    ethnicity="", 
    lab_mapping={}
)

In [None]:
h_corrs_data_df = h_plotter.plot(final_h_final_lab_med_data, h_labs, before_windows=before_windows, after_windows=after_windows)