# eICU data - dataset construction medications

In [15]:
import pandas as pd
import re
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 300)

In [16]:
patients = pd.read_csv('../2.0/patient.csv')
med = pd.read_csv('../2.0/medication.csv.gz')
patients_med = patients.merge(med, how='inner', on='patientunitstayid')

In [17]:
patients_med = patients_med[patients_med['drugorderoffset'] >=0 ]

In [18]:
#11 Variables overlapping between eICU data ,medication table, and MIMIC III data ,prescription table.
subset = [ 'Acetaminophen TAB', 'aspirin','bisacodyl','Insulin Aspart','Heparin','docusate sodium','Potassium Chloride Inj 2 MEQ/ML VIAL',
          'Magnesium Sulfate 2g/50mL SSO','metoprolol tartrate','Sodium Chloride 0.9%','pantoprazole']
patients_med = patients_med.loc[patients_med['drugname'].isin(subset)]

### Cleaning 

In [19]:
#patients_med_selected.to_csv('patients_med_selected.csv')
patients_med_new=patients_med
patients_med_new['dosage_r']=""
patients_med_new['unit']=""
#put the units into a separate columns
patients_med_new['dosage_r'] = patients_med_new['dosage'].str.split(" ",n =1,expand =True)
patients_med_new['unit'] = patients_med_new['dosage'].str.split(" ",n =1,expand =True)[1]


In [20]:

inf_drug_filtered=patients_med_new.loc[(patients_med_new['unit'].str.contains("mg")) | (patients_med_new['unit'].str.contains("mL")) |
                (patients_med_new['unit'].str.contains("MG")) | (patients_med_new['unit'].str.contains("MEQ")) |
                (patients_med_new['unit'].str.contains("ML")) | (patients_med_new['unit'].str.contains("g"))|
                                      (patients_med_new['unit'].str.contains("UNITS"))]

#we remove all the garbage values (non-float values) from the drugrate to keep it purely numerical for further use
inf_drug_filtered=inf_drug_filtered.loc[~(inf_drug_filtered['dosage_r']=='OFF\\.br\\\\.br\\') & ~(inf_drug_filtered['dosage_r']=='30\\.br\\') &
                     ~(inf_drug_filtered['dosage_r']=='50 mcg/min') & ~(inf_drug_filtered['dosage_r']=='50mcg/min\\.br\\') &
                     ~(inf_drug_filtered['dosage_r']=='OFF') & ~(inf_drug_filtered['dosage_r']=='Documentation undone')]

In [21]:
print(inf_drug_filtered.groupby("drugname")["unit"].value_counts())

drugname                              unit  
Acetaminophen TAB                     mg        1131
Heparin                               UNITS      601
                                      ML          10
Insulin Aspart                        UNITS      403
Magnesium Sulfate 2g/50mL SSO         g         1054
Potassium Chloride Inj 2 MEQ/ML VIAL  MEQ       2137
Sodium Chloride 0.9%                  ML        1500
                                      mL         810
                                      Charge     174
aspirin                               mg        8096
                                      Charge      10
bisacodyl                             mg        4354
                                      Charge       2
docusate sodium                       mg        2095
                                      MG           2
metoprolol tartrate                   mg        2417
                                      MG          81
pantoprazole                          mg        4623
 

In [22]:
frequent_meds = patients_med.groupby('drugname')['patientunitstayid'].nunique().sort_values(ascending=False)

In [23]:
long_meds = patients_med.groupby('drugname').apply(lambda x: x.groupby('patientunitstayid').size().mean()).sort_values(ascending=False)
selected_meds = set(list(long_meds.index)) & set(list(frequent_meds.index))
# To have a look at what lab measurements you have selected
meds_selected_df = pd.concat([frequent_meds.loc[selected_meds], long_meds.loc[selected_meds]], axis=1).rename({'patientunitstayid': 'icu_stays', 0: 'avg_length'}, axis=1)
print(meds_selected_df.to_string())

patients_med_selected = patients_med[patients_med['drugname'].isin(selected_meds)]

                                      icu_stays  avg_length
drugname                                                   
Heparin                                     763    1.888598
metoprolol tartrate                        1463    1.707450
Acetaminophen TAB                           736    1.550272
Sodium Chloride 0.9%                       1864    1.900751
Insulin Aspart                             1924    1.823805
pantoprazole                               3528    2.005952
bisacodyl                                  3065    1.425449
Magnesium Sulfate 2g/50mL SSO               473    2.255814
docusate sodium                            1320    1.594697
Potassium Chloride Inj 2 MEQ/ML VIAL        990    2.250505
aspirin                                    4988    1.658380


In [24]:
patients_med_selected['med_times'] = (patients_med_selected['drugorderoffset'])
med_dict=dict(zip(list(selected_meds),range(len(selected_meds))))
patients_med_selected['med_code']=patients_med_selected['drugname'].map(med_dict)
patients_med_selected_short = patients_med_selected[['patientunitstayid', 'med_times', 'med_code', 'dosage_r', 'unit']].rename({'patientunitstayid': 'id'}, axis=1)


In [25]:
patients_med_selected_short['dosage_r'] = patients_med_selected_short['dosage_r'].str.replace(',','')
patients_med_selected_short['dosage_r'] = patients_med_selected_short['dosage_r'].str.replace("Manual",'')

In [26]:
patients_med_selected_short['dosage_r'] = pd.to_numeric(patients_med_selected_short['dosage_r'],errors='coerce')

In [27]:
patients_med_selected_short_gb = patients_med_selected_short.groupby(['id', 'med_times', 'med_code'], as_index=False).mean()

In [28]:
patients_med_selected_short.to_csv('medication_processed.csv')