In [1]:
import pandas as pd
import numpy as np
import pickle

In [2]:
#load in crosstables: 
with open("../../crosstables/procedure_list.txt", "rb") as fp:   # Unpickling
    procedure_list = pickle.load(fp)
with open("../../crosstables/prescription_list.txt", "rb") as dl:   # Unpickling
    drug_list = pickle.load(dl)

In [3]:
def feature_eng(admit, icu, proc, drug):
    """
    INPUT: dataframes (admission, icu, procedures, and prescriptions)
    BEHAVIOR: 
    OUTPUT:
    """    
    admit_fields = ["subject_id","hadm_id", "admission_type", "admission_location", 
          "discharge_location", "insurance", "diagnosis", "hospital_expire_flag"]
    admitdf = pd.read_csv(admit, usecols = admit_fields)
    icu_fields =["subject_id", "hadm_id", "icustay_id", "first_careunit", "last_careunit",
        "first_wardid", "last_wardid", "los"]
    icudf = pd.read_csv(icu, usecols = icu_fields)
    
    deaths = admitdf[admitdf['discharge_location'].isin(['DEAD/EXPIRED'])]
    icu_admin = pd.merge(icudf, deaths, how='left', on='hadm_id')

    icu_full = (
                icu_admin.
                drop(columns=['subject_id_y']).
                rename(columns={"subject_id_x": "subject_id"})
    )
    
    procdf = pd.read_csv(proc)
    procdf = procdf[procdf['ordercategoryname'].isin(procedure_list)].reset_index()
    procdf = procdf[procdf['icustay_id'].notna()]
    myproc_counts = procdf.groupby(['subject_id', 'icustay_id', 'ordercategoryname']).size().reset_index(name='counts')
    myproc_counts_long = myproc_counts.pivot(index = ['subject_id','icustay_id'], 
                                             columns = 'ordercategoryname',
                                             values = 'counts').reset_index()
    myproc_counts_long = myproc_counts_long.replace(np.nan,0)
    myproc_counts_long = myproc_counts_long.rename_axis(None).drop(columns=['subject_id'])

    drugdf = pd.read_csv(drug)
    #filter rows to only these drugs
    drugdf = drugdf[drugdf['formulary_drug_cd'].isin(drug_list)].reset_index()
    drugdf = drugdf[drugdf['icustay_id'].notna()]
    #now start making counts based on person and icu_stay_id
    mycounts = drugdf.groupby(['subject_id', 'icustay_id', 'formulary_drug_cd']).size().reset_index(name='counts')
    mycounts_long = mycounts.pivot(index = ['subject_id','icustay_id'], 
                                   columns = 'formulary_drug_cd', values = 'counts').reset_index()
    mycounts_long = mycounts_long.replace(np.nan,0).drop(columns=['subject_id'])
    
    df1 = pd.merge(icu_full, mycounts_long, how='left', on='icustay_id')
    analyticdf = pd.merge(df1, myproc_counts_long, how='left', on='icustay_id')
    
    return analyticdf

In [15]:
# data files
procdf = "https://physionet.org/files/mimiciii-demo/1.4/PROCEDUREEVENTS_MV.csv?download"
drugdf = "https://physionet.org/files/mimiciii-demo/1.4/PRESCRIPTIONS.csv?download"
admitdf = "https://physionet.org/files/mimiciii-demo/1.4/ADMISSIONS.csv?download"
icudf = "https://physionet.org/files/mimiciii-demo/1.4/ICUSTAYS.csv?download"

In [25]:
# call data
testdat = feature_eng(admitdf, icudf, procdf, drugdf)

In [26]:
# convert nas in specific columns to 0
testdat[procedure_list] = testdat[procedure_list].fillna(0)
testdat[drug_list] = testdat[drug_list].fillna(0)
# drop columns
dat = testdat.drop(columns=['subject_id', 'hadm_id', 'icustay_id'])
dat.head()

Unnamed: 0,first_careunit,last_careunit,first_wardid,last_wardid,los,deathtime,admission_type,admission_location,discharge_location,insurance,...,VANC1F,VANCOBASE,Dialysis,Imaging,Intubation/Extubation,Invasive Lines,Peripheral Lines,Procedures,Significant Events,Ventilation
0,MICU,MICU,52,52,1.6325,,,,,,...,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,MICU,MICU,15,15,13.8507,2126-08-28 18:59:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Private,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,MICU,MICU,15,15,2.6499,2125-10-07 15:13:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CCU,CCU,7,7,2.1436,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,MICU,MICU,15,15,1.2938,2163-05-15 12:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
