
# Medication-Labtest Pairs Retieval and T-Test P-values

## Import Libraries

In [None]:
import pandas as pd
import datetime
import numpy as np
from scipy.stats import mannwhitneyu
from scipy import stats
from tqdm import tqdm
import os

## Load Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
PARENT='/content/drive/MyDrive/TAU'
DATA = PARENT+'/Datasets/mimiciii/1.4'

In [None]:
for i in os.walk(os.path.join(DATA)):
    print(i)

('/content/drive/MyDrive/TAU/Datasets/mimiciii/1.4', [], ['CALLOUT.csv.gz', 'ADMISSIONS.csv.gz', 'CAREGIVERS.csv.gz', 'D_ITEMS.csv.gz', 'D_ICD_PROCEDURES.csv.gz', 'D_ICD_DIAGNOSES.csv.gz', 'ICUSTAYS.csv.gz', 'CHARTEVENTS.csv.gz', 'DRGCODES.csv.gz', 'CPTEVENTS.csv.gz', 'DIAGNOSES_ICD.csv.gz', 'D_CPT.csv.gz', 'D_LABITEMS.csv.gz', 'DATETIMEEVENTS.csv.gz', 'INPUTEVENTS_CV.csv.gz', 'INPUTEVENTS_MV.csv.gz', 'LABEVENTS.csv.gz', 'MICROBIOLOGYEVENTS.csv.gz', 'OUTPUTEVENTS.csv.gz', 'PATIENTS.csv.gz', 'README.md', 'PROCEDURES_ICD.csv.gz', 'PRESCRIPTIONS.csv.gz', 'TRANSFERS.csv.gz', 'PROCEDUREEVENTS_MV.csv.gz', 'SERVICES.csv.gz', 'LICENSE.txt', 'NOTEEVENTS.csv.gz', 'SHA256SUMS.txt', 'index.html'])


## Load Data

### Labevents

In [None]:
labevents = pd.read_csv(os.path.join(DATA, 'LABEVENTS.csv.gz')).dropna()
d_labitems = pd.read_csv(os.path.join(DATA, 'D_LABITEMS.csv.gz')).dropna()

In [None]:
labevents.shape, d_labitems.shape

((7881769, 9), (585, 6))

In [None]:
labevents

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ITEMID,CHARTTIME,VALUE,VALUENUM,VALUEUOM,FLAG
162,443,3,145834.0,50893,2101-10-20 16:40:00,8.2,8.2,mg/dL,abnormal
163,444,3,145834.0,50902,2101-10-20 16:40:00,99,99.0,mEq/L,abnormal
166,447,3,145834.0,50912,2101-10-20 16:40:00,3.2,3.2,mg/dL,abnormal
169,450,3,145834.0,50970,2101-10-20 16:40:00,4.8,4.8,mg/dL,abnormal
170,451,3,145834.0,50971,2101-10-20 16:40:00,5.4,5.4,mEq/L,abnormal
...,...,...,...,...,...,...,...,...,...
27854045,27428430,96443,103219.0,50862,2109-12-30 01:40:00,2.2,2.2,g/dL,abnormal
27854046,27428431,96443,103219.0,50863,2109-12-30 01:40:00,172,172.0,IU/L,abnormal
27854049,27428434,96443,103219.0,50878,2109-12-30 01:40:00,467,467.0,IU/L,abnormal
27854051,27428436,96443,103219.0,50885,2109-12-30 01:40:00,2.1,2.1,mg/dL,abnormal


In [None]:
labValues = pd.merge(labevents, d_labitems, on='ITEMID', how='inner')

In [None]:
labValues.shape

(7876570, 14)

In [None]:
# subject_id,l.hadm_id, d.label, l.valuenum, l.valueuom, l.charttime
labValues = labValues[['SUBJECT_ID', 'HADM_ID', 'LABEL', 'VALUENUM', 'VALUEUOM', 'CHARTTIME']]

In [None]:
labValues['CHARTTIME'] = pd.to_datetime(labValues['CHARTTIME'],  format='%Y/%m/%d %H:%M:%S')

In [None]:
labValues

Unnamed: 0,SUBJECT_ID,HADM_ID,LABEL,VALUENUM,VALUEUOM,CHARTTIME
0,3,145834.0,"Calcium, Total",8.2,mg/dL,2101-10-20 16:40:00
1,3,145834.0,"Calcium, Total",7.6,mg/dL,2101-10-22 04:00:00
2,3,145834.0,"Calcium, Total",7.1,mg/dL,2101-10-22 21:15:00
3,3,145834.0,"Calcium, Total",7.5,mg/dL,2101-10-23 03:45:00
4,3,145834.0,"Calcium, Total",7.4,mg/dL,2101-10-20 19:59:00
...,...,...,...,...,...,...
7876565,53160,138843.0,Plasma,1.0,%,2194-06-27 13:52:00
7876566,96240,168044.0,Plasma,7.0,%,2105-05-16 16:45:00
7876567,40200,108146.0,Sex Hormone Binding Globulin,12.0,nmol/L,2165-01-10 16:00:00
7876568,58010,185994.0,Sex Hormone Binding Globulin,11.0,nmol/L,2109-02-19 00:16:00


In [None]:
del labevents, d_labitems

### Admissions

In [None]:
admissions = pd.read_csv(os.path.join(DATA, 'ADMISSIONS.csv.gz'))

In [None]:
# subject_id,hadm_id
admissions = admissions[['SUBJECT_ID', 'HADM_ID']]
admissions

Unnamed: 0,SUBJECT_ID,HADM_ID
0,22,165315
1,23,152223
2,23,124321
3,24,161859
4,25,129635
...,...,...
58971,98800,191113
58972,98802,101071
58973,98805,122631
58974,98813,170407


### Input Events MV

In [None]:
inputevents_mv = pd.read_csv(os.path.join(DATA, 'INPUTEVENTS_MV.csv.gz'), nrows=500000)

In [None]:
inputevents_cv = pd.read_csv(os.path.join(DATA, 'INPUTEVENTS_CV.csv.gz'), nrows=500000)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
inputevents_mv.columns

Index(['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'STARTTIME', 'ENDTIME',
       'ITEMID', 'AMOUNT', 'AMOUNTUOM', 'RATE', 'RATEUOM', 'STORETIME', 'CGID',
       'ORDERID', 'LINKORDERID', 'ORDERCATEGORYNAME',
       'SECONDARYORDERCATEGORYNAME', 'ORDERCOMPONENTTYPEDESCRIPTION',
       'ORDERCATEGORYDESCRIPTION', 'PATIENTWEIGHT', 'TOTALAMOUNT',
       'TOTALAMOUNTUOM', 'ISOPENBAG', 'CONTINUEINNEXTDEPT', 'CANCELREASON',
       'STATUSDESCRIPTION', 'COMMENTS_EDITEDBY', 'COMMENTS_CANCELEDBY',
       'COMMENTS_DATE', 'ORIGINALAMOUNT', 'ORIGINALRATE'],
      dtype='object')

In [None]:
inputevents_mv['ORDERCATEGORYDESCRIPTION'].value_counts()

Continuous Med    196312
Drug Push         132569
Continuous IV     105383
Bolus              63875
Non Iv Meds         1861
Name: ORDERCATEGORYDESCRIPTION, dtype: int64

In [None]:
inputevents_mv[inputevents_mv['ORDERCATEGORYDESCRIPTION']=='Continuous Med'][['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'STARTTIME', 'ENDTIME',
       'ITEMID', 'AMOUNT', 'AMOUNTUOM', 'RATE', 'RATEUOM', 'STORETIME', 'CGID',
       'ORDERID', 'LINKORDERID', 'ORDERCATEGORYNAME',
       'SECONDARYORDERCATEGORYNAME', 'ORDERCOMPONENTTYPEDESCRIPTION',
       'ORDERCATEGORYDESCRIPTION', 'PATIENTWEIGHT']]

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,RATE,RATEUOM,STORETIME,CGID,ORDERID,LINKORDERID,ORDERCATEGORYNAME,SECONDARYORDERCATEGORYNAME,ORDERCOMPONENTTYPEDESCRIPTION,ORDERCATEGORYDESCRIPTION,PATIENTWEIGHT
21,29969,129547,248410.0,2181-02-20 21:00:00,2181-02-20 22:00:00,223258,5.000000,units,5.000000,units/hour,2181-02-20 21:00:00,18860,1173226,1173226,01-Drips,02-Fluids (Crystalloids),Main order parameter,Continuous Med,90.9
22,29969,129547,248410.0,2181-02-20 21:00:00,2181-02-20 22:00:00,225158,5.000000,ml,5.000000,mL/hour,2181-02-20 21:00:00,18860,1173226,1173226,01-Drips,02-Fluids (Crystalloids),Mixed solution,Continuous Med,90.9
25,29969,129547,248410.0,2181-02-21 04:00:00,2181-02-21 07:24:00,223258,34.984998,units,10.289705,units/hour,2181-02-21 08:02:00,18860,1833327,1173226,01-Drips,02-Fluids (Crystalloids),Main order parameter,Continuous Med,90.9
26,29969,129547,248410.0,2181-02-21 04:00:00,2181-02-21 07:24:00,225158,34.984998,ml,10.289705,mL/hour,2181-02-21 08:02:00,18860,1833327,1173226,01-Drips,02-Fluids (Crystalloids),Mixed solution,Continuous Med,90.9
29,29969,129547,248410.0,2181-02-21 04:00:00,2181-02-21 04:01:00,223258,0.166597,units,9.995806,units/hour,2181-02-21 04:05:00,18860,2343538,1173226,01-Drips,02-Fluids (Crystalloids),Main order parameter,Continuous Med,90.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499986,21613,180503,289907.0,2199-02-19 11:01:00,2199-02-19 13:13:00,225158,21.983763,ml,9.992620,mL/hour,2199-02-19 11:10:00,14757,1997834,5370036,01-Drips,02-Fluids (Crystalloids),Mixed solution,Continuous Med,72.0
499989,21613,180503,289907.0,2199-02-18 10:52:00,2199-02-18 10:53:00,223258,0.447645,units,26.858699,units/hour,2199-02-18 10:50:00,17250,2305728,3337514,01-Drips,02-Fluids (Crystalloids),Main order parameter,Continuous Med,72.0
499990,21613,180503,289907.0,2199-02-18 10:52:00,2199-02-18 10:53:00,225158,0.447645,ml,26.858699,mL/hour,2199-02-18 10:50:00,17250,2305728,3337514,01-Drips,02-Fluids (Crystalloids),Mixed solution,Continuous Med,72.0
499998,98229,149609,296538.0,2121-03-05 17:00:00,2121-03-05 18:48:00,222168,374.464296,mg,49.745509,mcg/kg/min,2121-03-05 22:47:00,21297,3452651,1753581,01-Drips,02-Fluids (Crystalloids),Main order parameter,Continuous Med,69.7


In [None]:
inputevents_mv[['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'STARTTIME', 'ENDTIME',
       'ITEMID', 'AMOUNT', 'AMOUNTUOM', 'RATE', 'RATEUOM', 'STORETIME', 'CGID',
       'ORDERID', 'LINKORDERID', 'ORDERCATEGORYNAME',
       'SECONDARYORDERCATEGORYNAME', 'ORDERCOMPONENTTYPEDESCRIPTION',
       'ORDERCATEGORYDESCRIPTION', 'PATIENTWEIGHT']]

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,RATE,RATEUOM,STORETIME,CGID,ORDERID,LINKORDERID,ORDERCATEGORYNAME,SECONDARYORDERCATEGORYNAME,ORDERCOMPONENTTYPEDESCRIPTION,ORDERCATEGORYDESCRIPTION,PATIENTWEIGHT
0,27063,139787,223259.0,2133-02-05 06:29:00,2133-02-05 08:45:00,225166,6.774532,mEq,,,2133-02-05 06:27:00,16009,4892074,4892074,02-Fluids (Crystalloids),Additive (Crystalloid),Additives ...,Continuous IV,83.2
1,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 06:30:00,225944,28.132997,ml,30.142497,mL/hour,2133-02-05 12:57:00,20053,5211428,1576890,02-Fluids (Crystalloids),Additive (Crystalloid),Main order parameter,Continuous IV,83.2
2,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 06:30:00,225166,2.813300,mEq,,,2133-02-05 12:57:00,20053,5211428,1576890,02-Fluids (Crystalloids),Additive (Crystalloid),Additives ...,Continuous IV,83.2
3,27063,139787,223259.0,2133-02-03 12:00:00,2133-02-03 12:01:00,225893,1.000000,dose,,,2133-02-03 13:21:00,20030,5213971,5213971,08-Antibiotics (IV),02-Fluids (Crystalloids),Main order parameter,Drug Push,83.2
4,27063,139787,223259.0,2133-02-03 12:00:00,2133-02-03 12:01:00,220949,100.000000,ml,,,2133-02-03 13:21:00,20030,5213971,5213971,08-Antibiotics (IV),02-Fluids (Crystalloids),Mixed solution,Drug Push,83.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,21613,180503,289907.0,2199-02-18 15:01:00,2199-02-18 15:35:00,220949,113.333332,ml,199.999998,mL/hour,2199-02-18 15:01:00,17250,2570503,2570503,02-Fluids (Crystalloids),Additive (Crystalloid),Main order parameter,Continuous IV,72.0
499996,98229,149609,296538.0,2121-03-07 10:00:00,2121-03-07 11:00:00,226089,49.999999,ml,49.999999,mL/hour,2121-03-07 10:51:00,20808,3434949,3434949,02-Fluids (Crystalloids),Additive (Crystalloid),Main order parameter,Continuous IV,69.7
499997,98229,149609,296538.0,2121-03-07 10:00:00,2121-03-07 11:00:00,222011,2.000000,grams,,,2121-03-07 10:51:00,20808,3434949,3434949,02-Fluids (Crystalloids),Additive (Crystalloid),Additives ...,Continuous IV,69.7
499998,98229,149609,296538.0,2121-03-05 17:00:00,2121-03-05 18:48:00,222168,374.464296,mg,49.745509,mcg/kg/min,2121-03-05 22:47:00,21297,3452651,1753581,01-Drips,02-Fluids (Crystalloids),Main order parameter,Continuous Med,69.7


In [None]:
inputevents_mv['ORDERCATEGORYNAME'].value_counts()

01-Drips                    196312
02-Fluids (Crystalloids)     76453
08-Antibiotics (IV)          59048
14-Oral/Gastric Intake       51766
05-Med Bolus                 33985
13-Enteral Nutrition         18647
06-Insulin (Non IV)          15424
11-Prophylaxis (Non IV)      13991
10-Prophylaxis (IV)          10121
03-IV Fluid Bolus             9305
07-Blood Products             5369
12-Parenteral Nutrition       3146
16-Pre Admission              2787
09-Antibiotics (Non IV)       1861
04-Fluids (Colloids)          1768
15-Supplements                  17
Name: ORDERCATEGORYNAME, dtype: int64

### Item

In [None]:
d_item = pd.read_csv(os.path.join(DATA, 'D_ITEMS.csv.gz'))

In [None]:
# d_item[d_item['CATEGORY']=='Antibiotics']

### Item <> Input events MV

In [None]:
ditem_inputevents_mv = pd.merge(inputevents_mv, d_item, on='ITEMID', how='inner')

In [None]:
ditem_inputevents_mv.columns

Index(['ROW_ID_x', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'STARTTIME',
       'ENDTIME', 'ITEMID', 'AMOUNT', 'AMOUNTUOM', 'RATE', 'RATEUOM',
       'STORETIME', 'CGID', 'ORDERID', 'LINKORDERID', 'ORDERCATEGORYNAME',
       'SECONDARYORDERCATEGORYNAME', 'ORDERCOMPONENTTYPEDESCRIPTION',
       'ORDERCATEGORYDESCRIPTION', 'PATIENTWEIGHT', 'TOTALAMOUNT',
       'TOTALAMOUNTUOM', 'ISOPENBAG', 'CONTINUEINNEXTDEPT', 'CANCELREASON',
       'STATUSDESCRIPTION', 'COMMENTS_EDITEDBY', 'COMMENTS_CANCELEDBY',
       'COMMENTS_DATE', 'ORIGINALAMOUNT', 'ORIGINALRATE', 'ROW_ID_y', 'LABEL',
       'ABBREVIATION', 'DBSOURCE', 'LINKSTO', 'CATEGORY', 'UNITNAME',
       'PARAM_TYPE', 'CONCEPTID'],
      dtype='object')

In [None]:
inputevents_mv_1 = ditem_inputevents_mv[['SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'STARTTIME', 'ENDTIME', 'ITEMID', 'AMOUNT', 'AMOUNTUOM', 'UNITNAME', 'ORDERCATEGORYNAME', 'LABEL', 'CATEGORY', 'PARAM_TYPE']]
inputevents_mv_1

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,UNITNAME,ORDERCATEGORYNAME,LABEL,CATEGORY,PARAM_TYPE
0,27063,139787,223259.0,2133-02-05 06:29:00,2133-02-05 08:45:00,225166,6.774532,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
1,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 06:30:00,225166,2.813300,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
2,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 07:03:00,225166,4.433333,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
3,27063,139787,223259.0,2133-02-05 09:43:00,2133-02-05 12:30:00,225166,8.360021,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
4,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 05:35:00,225166,0.050060,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,65382,163918,288487.0,2132-12-09 02:35:00,2132-12-09 02:36:00,227692,0.004046,mg,mg,01-Drips,Isuprel,Medications,Solution
499996,60970,163433,274168.0,2132-02-08 05:37:00,2132-02-08 07:15:00,225148,249.999999,mg,mg,01-Drips,Bivalirudin (Angiomax),Medications,Solution
499997,45426,166752,236992.0,2144-01-06 17:33:00,2144-01-07 00:16:00,226022,134.333336,ml,mL,13-Enteral Nutrition,Impact (3/4),Nutrition - Enteral,Solution
499998,54586,122570,222417.0,2168-03-09 11:00:00,2168-03-09 12:00:00,225173,7500.000000,ml,mL,07-Blood Products,Cell Saver,Blood Products/Colloids,Solution


In [None]:
top200_meds = inputevents_mv_1['LABEL'].value_counts()[:200]

In [None]:
inputevents_mv_1['CATEGORY'].value_counts()

Fluids/Intake              242772
Medications                196463
Antibiotics                 31382
Nutrition - Enteral         18661
Blood Products/Colloids      7929
Nutrition - Parenteral       2790
Nutrition - Supplements         3
Name: CATEGORY, dtype: int64

In [None]:
inputevents_mv_1['STARTTIME'] = pd.to_datetime(inputevents_mv_1['STARTTIME'],  format='%Y/%m/%d %H:%M:%S')
inputevents_mv_1['ENDTIME'] = pd.to_datetime(inputevents_mv_1['ENDTIME'],  format='%Y/%m/%d %H:%M:%S')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


## Data Preprocessing

In [None]:
def remove_multiple_admissions(df):
    """
    Removes hospital admissions that occur more than once for the same patient
  
    Parameters:
    df (DataFrame): Takes in dataframe with multiple hospital admissions
  
    Returns:
    Dataframe: Returns dataframe with multiple hospital admissions removed
    """
    first_admissions = admissions
    first_admissions = first_admissions.drop_duplicates(subset=['SUBJECT_ID'], keep='first')
    df = df[df['HADM_ID'].isin(first_admissions['HADM_ID'])]
    return df

In [None]:
lab_measurements = labValues

In [None]:
top200_meds = pd.DataFrame(top200_meds, columns=['LABEL']).reset_index()
top200_meds.rename(columns = {'index':'MED', 'LABEL':'COUNT'}, inplace = True)
top200_meds

Unnamed: 0,MED,COUNT
0,NaCl 0.9%,76138
1,Dextrose 5%,60536
2,Solution,27506
3,Propofol,20005
4,PO Intake,19287
...,...,...
195,Nutren 2.0 (1/2),7
196,Nutren 2.0 (3/4),7
197,Valgancyclovir,7
198,Magnesium Sulfate (OB-GYN),5


In [None]:
patient_presc = inputevents_mv_1

In [None]:
patient_presc = remove_multiple_admissions(patient_presc)
patient_presc = inputevents_mv_1[inputevents_mv_1['LABEL'].isin(top200_meds['MED'])]

In [None]:
patient_presc

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,UNITNAME,ORDERCATEGORYNAME,LABEL,CATEGORY,PARAM_TYPE
0,27063,139787,223259.0,2133-02-05 06:29:00,2133-02-05 08:45:00,225166,6.774532,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
1,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 06:30:00,225166,2.813300,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
2,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 07:03:00,225166,4.433333,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
3,27063,139787,223259.0,2133-02-05 09:43:00,2133-02-05 12:30:00,225166,8.360021,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
4,27063,139787,223259.0,2133-02-05 05:34:00,2133-02-05 05:35:00,225166,0.050060,mEq,mEq,02-Fluids (Crystalloids),Potassium Chloride,Medications,Solution
...,...,...,...,...,...,...,...,...,...,...,...,...,...
499991,65382,163918,288487.0,2132-12-09 04:42:00,2132-12-09 08:49:00,227692,0.999294,mg,mg,01-Drips,Isuprel,Medications,Solution
499992,65382,163918,288487.0,2132-12-09 01:36:00,2132-12-09 01:49:00,227692,0.117747,mg,mg,01-Drips,Isuprel,Medications,Solution
499993,65382,163918,288487.0,2132-12-09 02:10:00,2132-12-09 02:20:00,227692,0.060357,mg,mg,01-Drips,Isuprel,Medications,Solution
499994,65382,163918,288487.0,2132-12-09 04:45:00,2132-12-09 14:50:00,227692,2.420000,mg,mg,01-Drips,Isuprel,Medications,Solution


In [None]:
lab_measurements = lab_measurements[lab_measurements.duplicated(subset=['SUBJECT_ID','LABEL'],keep=False)]
lab_measurements = lab_measurements[lab_measurements['HADM_ID'].isin(patient_presc['HADM_ID'])]

## Generating Lab Test<>Meds Pairings

In [None]:
def labpairing(medname, prescdf, labdf, labname):
    """Pairs the drug input with each lab test

    Parameters:
    drugname (String): Drug Name
    prescdf (DataFrame): Dataframe containing the prescription data
    labdf (DataFrame): Dataframe containing the lab measurement data
    labname (DataFrame): Lab Test Name
    Returns:
    DataFrame: Contains all the rows of values and times for that particular drug lab apir
  
    """
    # Select patients who have taken the drug
    prescdf = prescdf[prescdf['LABEL']==medname]
    prescdf = prescdf.drop_duplicates(subset=['SUBJECT_ID'], keep='first')

    # Select lab measurements of patients who have taken the drug
    labdf = labdf[labdf['HADM_ID'].isin(prescdf['HADM_ID'])]

    # Selects the lab measurement entered
    drug_lab_specific = labdf[labdf['LABEL']==labname]
    mergeddf = pd.merge(drug_lab_specific, prescdf, on=['HADM_ID','SUBJECT_ID'])

    # Get time from prescription and choose before and after lab measurements (within 24hrs=1day)
    mergeddf['timeFromPrescription'] = mergeddf['CHARTTIME'] - mergeddf['STARTTIME']
    posmergeddf = mergeddf.loc[mergeddf.timeFromPrescription > datetime.timedelta(days=0)]
    negmergeddf = mergeddf.loc[mergeddf.timeFromPrescription < datetime.timedelta(days=0)]
    
    # Only keep values for which we have both before and after
    posmergeddf = posmergeddf[posmergeddf['HADM_ID'].isin(negmergeddf['HADM_ID'])]
    negmergeddf = negmergeddf[negmergeddf['HADM_ID'].isin(posmergeddf['HADM_ID'])]

    #Select the values closest to 0]
    before = posmergeddf[posmergeddf['SUBJECT_ID'].isin(posmergeddf.groupby('SUBJECT_ID').count().index)]
    posmergeddf = posmergeddf.loc[posmergeddf.groupby('SUBJECT_ID').timeFromPrescription.idxmin()]

    after = negmergeddf[negmergeddf['SUBJECT_ID'].isin(negmergeddf.groupby('SUBJECT_ID').count().index)]
    negmergeddf = negmergeddf.loc[negmergeddf.groupby('SUBJECT_ID').timeFromPrescription.idxmax()]

    finaldf = negmergeddf.merge(posmergeddf,on=['HADM_ID','SUBJECT_ID'])
    
    return finaldf, before, after

In [None]:
drug_lab, before, after = labpairing('NaCl 0.9%', patient_presc, lab_measurements, 'Calcium, Total')

In [None]:
before.sort_values(by=['CHARTTIME', 'LABEL_x', 'LABEL_y'], ascending=True)

Unnamed: 0,SUBJECT_ID,HADM_ID,LABEL_x,VALUENUM,VALUEUOM,CHARTTIME,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,UNITNAME,ORDERCATEGORYNAME,LABEL_y,CATEGORY,PARAM_TYPE,timeFromPrescription
5107,31585,125380.0,"Calcium, Total",7.5,mg/dL,2100-07-04 07:15:00,234741.0,2100-07-03 03:01:00,2100-07-03 03:02:00,225158,500.000000,ml,mL,03-IV Fluid Bolus,NaCl 0.9%,Fluids/Intake,Solution,1 days 04:14:00
5108,31585,125380.0,"Calcium, Total",7.7,mg/dL,2100-07-05 06:35:00,234741.0,2100-07-03 03:01:00,2100-07-03 03:02:00,225158,500.000000,ml,mL,03-IV Fluid Bolus,NaCl 0.9%,Fluids/Intake,Solution,2 days 03:34:00
5109,31585,125380.0,"Calcium, Total",7.6,mg/dL,2100-07-06 06:30:00,234741.0,2100-07-03 03:01:00,2100-07-03 03:02:00,225158,500.000000,ml,mL,03-IV Fluid Bolus,NaCl 0.9%,Fluids/Intake,Solution,3 days 03:29:00
5110,31585,125380.0,"Calcium, Total",7.7,mg/dL,2100-07-07 05:50:00,234741.0,2100-07-03 03:01:00,2100-07-03 03:02:00,225158,500.000000,ml,mL,03-IV Fluid Bolus,NaCl 0.9%,Fluids/Intake,Solution,4 days 02:49:00
4632,31263,198779.0,"Calcium, Total",7.7,mg/dL,2100-07-20 13:52:00,278147.0,2100-07-20 12:00:00,2100-07-20 12:01:00,225158,500.000000,ml,mL,03-IV Fluid Bolus,NaCl 0.9%,Fluids/Intake,Solution,0 days 01:52:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,4655,129279.0,"Calcium, Total",8.0,mg/dL,2204-02-05 13:56:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,0 days 04:31:00
375,4655,129279.0,"Calcium, Total",8.1,mg/dL,2204-02-06 04:05:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,0 days 18:40:00
376,4655,129279.0,"Calcium, Total",8.3,mg/dL,2204-02-06 13:25:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,1 days 04:00:00
371,4655,129279.0,"Calcium, Total",8.2,mg/dL,2204-02-07 02:18:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,1 days 16:53:00


In [None]:
after.sort_values(by=['CHARTTIME', 'LABEL_x', 'LABEL_y'], ascending=False)

Unnamed: 0,SUBJECT_ID,HADM_ID,LABEL_x,VALUENUM,VALUEUOM,CHARTTIME,ICUSTAY_ID,STARTTIME,ENDTIME,ITEMID,AMOUNT,AMOUNTUOM,UNITNAME,ORDERCATEGORYNAME,LABEL_y,CATEGORY,PARAM_TYPE,timeFromPrescription
374,4655,129279.0,"Calcium, Total",7.9,mg/dL,2204-02-05 01:57:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,-1 days +16:32:00
369,4655,129279.0,"Calcium, Total",8.1,mg/dL,2204-02-04 16:38:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,-1 days +07:13:00
373,4655,129279.0,"Calcium, Total",8.3,mg/dL,2204-02-04 06:30:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,-2 days +21:05:00
372,4655,129279.0,"Calcium, Total",8.1,mg/dL,2204-02-04 01:25:00,292842.0,2204-02-05 09:25:00,2204-02-05 11:25:00,225158,69.999997,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,-2 days +16:00:00
1440,19627,155310.0,"Calcium, Total",7.5,mg/dL,2203-10-29 10:22:00,239039.0,2203-10-29 11:02:00,2203-10-31 19:48:00,225158,567.666678,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,-1 days +23:20:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11680,84686,131910.0,"Calcium, Total",8.2,mg/dL,2100-08-17 04:40:00,242735.0,2100-08-21 12:30:00,2100-08-21 13:30:00,225158,99.999996,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,-5 days +16:10:00
11679,84686,131910.0,"Calcium, Total",8.0,mg/dL,2100-08-16 13:34:00,242735.0,2100-08-21 12:30:00,2100-08-21 13:30:00,225158,99.999996,ml,mL,02-Fluids (Crystalloids),NaCl 0.9%,Fluids/Intake,Solution,-5 days +01:04:00
4631,31263,198779.0,"Calcium, Total",8.1,mg/dL,2100-07-19 00:30:00,278147.0,2100-07-20 12:00:00,2100-07-20 12:01:00,225158,500.000000,ml,mL,03-IV Fluid Bolus,NaCl 0.9%,Fluids/Intake,Solution,-2 days +12:30:00
5106,31585,125380.0,"Calcium, Total",7.1,mg/dL,2100-07-03 01:20:00,234741.0,2100-07-03 03:01:00,2100-07-03 03:02:00,225158,500.000000,ml,mL,03-IV Fluid Bolus,NaCl 0.9%,Fluids/Intake,Solution,-1 days +22:19:00


## Regression Analysis

## Performing Mannwhitney Test and T-Test

In [None]:
def postprocessing(df):
    """Gets the mean, standard deviation, mann whitney and t-test p values. Converts time delta to hours
  
    Parameters:
    df (DataFrame): Dataframe containing before and after lab test values and time values
    Returns:
    List:Containing mean, standard deviation, mann whitney and t-test p values and count
  
    """
    df['timeFromPrescription_x'] = pd.to_numeric(df['timeFromPrescription_x'].dt.seconds)
    df['timeFromPrescription_x']/=3600
    df['timeFromPrescription_y'] = pd.to_numeric(df['timeFromPrescription_y'].dt.seconds)
    df['timeFromPrescription_y']/=3600
    df_before_mean = df['VALUENUM_x'].mean()
    df_after_mean = df['VALUENUM_y'].mean()
    df_before_std = df['VALUENUM_x'].std()
    df_after_std = df['VALUENUM_y'].std()
    df_before_time_mean = df['timeFromPrescription_x'].mean()
    df_after_time_mean = df['timeFromPrescription_y'].mean()
    df_before_time_std = df['timeFromPrescription_x'].std()
    df_after_time_std = df['timeFromPrescription_y'].std()
    mannwhitneypvalue = mannwhitneyu(df['VALUENUM_x'], df['VALUENUM_y'])[1]
    ttestpvalue = stats.ttest_ind(df['VALUENUM_x'], df['VALUENUM_y'])[1]
    lengthofdf = len(df)
    csvrow=[lengthofdf,df_before_mean,df_before_std,df_before_time_mean,df_before_time_std,df_after_mean,df_after_std,df_after_time_mean,df_after_time_std,mannwhitneypvalue,ttestpvalue]
    return csvrow

 

In [None]:
drug_lab

Unnamed: 0,LABEL_x_x,VALUENUM_x,VALUEUOM_x,CHARTTIME_x,ICUSTAY_ID_x,STARTTIME_x,ENDTIME_x,ITEMID_x,AMOUNT_x,AMOUNTUOM_x,...,ENDTIME_y,ITEMID_y,AMOUNT_y,AMOUNTUOM_y,UNITNAME_y,ORDERCATEGORYNAME_y,LABEL_y_y,CATEGORY_y,PARAM_TYPE_y,timeFromPrescription_y


## Analysis Function

In [None]:
def comp_med_analysis(lab_measurements, top200_meds, n_medlab_pairs = 25, n_meds=None):
    res = pd.DataFrame(columns=['Medication Name','Feature Name','Number of patients','Feature Before(mean)','Feature Before(std)','Time Before(mean)','Time Before(std)','Feature After(mean)','Feature After(std)','Time After(mean)','Time After(std)','Mannwhitney-pvalue','Ttest-pvalue'])
    uniqueLabTests = lab_measurements.LABEL.unique()

    for i, med in enumerate(top200_meds['MED']): 
        if n_meds is not None and i>=n_meds:
            break
        print(i, ' MED: ', med)
        for j in tqdm(range(uniqueLabTests.shape[0])):
            labTest = uniqueLabTests[j]
            drug_lab=labpairing(med, patient_presc, lab_measurements, labTest)
            if(len(drug_lab) > n_medlab_pairs): 
                csvrow=postprocessing(drug_lab)
                csvrow.insert(0, med) 
                csvrow.insert(1, labTest)
                res.loc[len(res)] = csvrow
    return res

## Final Result

### Top 10 Medications and Atleast 50 Patients in Med<>Lab Pair

In [None]:
res = comp_med_analysis(lab_measurements, top200_meds, n_medlab_pairs=50, n_meds=10)

0  MED:  NaCl 0.9%


100%|██████████| 161/161 [00:25<00:00,  6.23it/s]


1  MED:  Dextrose 5%


100%|██████████| 161/161 [00:23<00:00,  6.71it/s]


2  MED:  Solution


100%|██████████| 161/161 [00:21<00:00,  7.44it/s]


3  MED:  Propofol


100%|██████████| 161/161 [00:18<00:00,  8.75it/s]


4  MED:  PO Intake


100%|██████████| 161/161 [00:22<00:00,  7.15it/s]


5  MED:  Insulin - Regular


100%|██████████| 161/161 [00:20<00:00,  7.88it/s]


6  MED:  Gastric Meds


100%|██████████| 161/161 [00:26<00:00,  6.11it/s]


7  MED:  Fentanyl


100%|██████████| 161/161 [00:20<00:00,  7.80it/s]


8  MED:  Norepinephrine


100%|██████████| 161/161 [00:22<00:00,  7.19it/s]


9  MED:  Phenylephrine


100%|██████████| 161/161 [00:20<00:00,  7.94it/s]


In [None]:
res

Unnamed: 0,Medication Name,Feature Name,Number of patients,Feature Before(mean),Feature Before(std),Time Before(mean),Time Before(std),Feature After(mean),Feature After(std),Time After(mean),Time After(std),Mannwhitney-pvalue,Ttest-pvalue
0,NaCl 0.9%,"Calcium, Total",1043,7.811601,0.852798,16.419751,6.010735,7.826366,0.741066,9.325775,6.819014,0.038304,0.673022
1,NaCl 0.9%,Chloride,812,107.272167,9.622521,15.968555,6.222861,107.825123,8.993981,9.147065,6.694655,0.497539,0.231757
2,NaCl 0.9%,Creatinine,844,2.666588,2.264125,16.760190,5.727178,2.608412,2.196355,8.091469,6.464699,0.071883,0.592175
3,NaCl 0.9%,Phosphate,973,3.547174,1.987395,15.845529,6.151007,3.430319,1.957726,9.633436,6.870303,0.029920,0.191499
4,NaCl 0.9%,Potassium,366,4.400820,1.531093,14.377505,6.954470,4.112842,1.359583,9.152732,6.651877,0.022873,0.007295
...,...,...,...,...,...,...,...,...,...,...,...,...,...
414,Phenylephrine,Lactate Dehydrogenase (LD),70,1071.542857,1967.656855,14.703810,6.566310,1474.428571,3120.660993,11.007381,6.889183,0.482548,0.362474
415,Phenylephrine,Calculated Total CO2,159,22.270440,7.573844,17.766667,6.526962,22.647799,8.153696,6.555556,6.764487,0.381172,0.669252
416,Phenylephrine,"Potassium, Whole Blood",76,4.476316,1.363708,16.134211,6.735989,3.651316,1.072877,9.906140,6.899038,0.000063,0.000057
417,Phenylephrine,Troponin T,72,0.495417,0.850970,17.481944,5.357136,0.514028,0.845358,6.709259,6.189675,0.249992,0.895443


In [None]:
res.to_csv(os.path.join(PARENT, 'Med-Labtest_Pairs_Top10Meds_50Subjects.csv'))