In [125]:
# Import necessary
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mt
import seaborn as sns
import numpy as np
import datetime as dt
import math

In [126]:
# Shows what each ICD9 code stands for 
D_ICD_Diagnoses = pd.read_csv('D_ICD_DIAGNOSES.csv')

# Shows all ICD9 codes for each patient marked by subject ID 
Diagnoses_ICD = pd.read_csv('DIAGNOSES_ICD.csv')

# List of all patients and their information
Patients = pd.read_csv('PATIENTS.csv')

# Additional information for each patient 
Admissions = pd.read_csv('ADMISSIONS.csv')

# Shows lab measurements with a corresponding ITEMID
lab = pd.read_csv('D_LABITEMS.csv')

# Shows results for each ITEMID lab measurements
eve = pd.read_csv('LABEVENTS.csv')

# Shows patient's own body measurements, heart rate, urine output....etc
item = pd.read_csv('D_ITEMS.csv')

# Shows results for patient's output measurements
out = pd.read_csv('OUTPUTEVENTS.csv')

In [127]:
# Calculating parameters for each patient(age of death, discharge..etc)

# Add Patient DOB in usable format and Merge with patient information
Patients_DOB = Patients['DOB']
Patients_DOB2 = []
for n in range(len(Patients_DOB)):
    prelim = Patients_DOB[n]
    Patients_DOB2.append(dt.datetime.strptime(prelim[0:10],'%Y-%m-%d').date())
Birth_Date_Series = pd.Series(Patients_DOB2)
Patients['DOB-2'] = Birth_Date_Series

# Add Data for Patient's Date of Death
Patients_DODeath = Patients['DOD']
Patients_DOD = []
for n in range(len(Patients_DODeath)):
    if pd.isnull(Patients_DODeath[n]) == True:  
        Patients_DOD.append(np.nan)
    else:
        prelim = Patients_DODeath[n]
        Patients_DOD.append(dt.datetime.strptime(prelim[0:10],'%Y-%m-%d').date())

# Add Data for Patient's Date of Admission
Patients_Admit = Admissions['ADMITTIME']
Patients_Admissions = []
for n in range(len(Patients_Admit)):
    if pd.isnull(Patients_Admit[n]) == True:  
        Patients_Admissions.append(np.nan)
    else:
        prelim = Patients_Admit[n]
        Patients_Admissions.append(dt.datetime.strptime(prelim[0:10],'%Y-%m-%d').date())

# Add Data for Patient's Date of Release
Patients_leave = Admissions['DISCHTIME']
Patients_Release = []
for n in range(len(Patients_leave)):
    if pd.isnull(Patients_leave[n]) == True:  
        Patients_Release.append(np.nan)
    else:
        prelim = Patients_leave[n]
        Patients_Release.append(dt.datetime.strptime(prelim[0:10],'%Y-%m-%d').date())
        
# Add Data to Series       
Birth_Date_Series = pd.Series(Patients_DOB2)
Death_Series = pd.Series(Patients_DOD)

Admit_Series = pd.Series(Patients_Admissions)
Release_Series = pd.Series(Patients_Release)

# Find Age at Death
Age_Death = (Death_Series - Birth_Date_Series)
death_age = []

for entry in range(len(Age_Death)):
    if type(Age_Death[entry]) == float:
        death_age.append(float('NAN'))
    else:
        death_age.append(Age_Death[entry].days)

Age_Death = pd.Series(death_age)

# Find Total Admission Times
Admissions_Time = (Release_Series - Admit_Series)

Admit_Time = []

for entry in range(len(Admissions_Time)):
    if type(Admissions_Time[entry]) == float:
        Admit_Time.append(float('NAN'))
    else:
        Admit_Time.append(Admissions_Time[entry].days)

Admissions_Time = pd.Series(Admit_Time)
        
# Add to Pandas Dataframe
Patients['DOB-2'] = Birth_Date_Series
Patients['DOD-2'] = Death_Series
Patients['AOD'] = Age_Death

Admissions['Admit'] = Admit_Series
Admissions['Release'] = Release_Series
Admissions['Total Admission Time'] = Admissions_Time 

Admissions_culled_Admits = Admissions_culled['ADMITTIME']

Admit_Ages = (Admit_Series - Birth_Date_Series)

# Combine admissions information and patient's information
Admissions_culled = Admissions.copy(deep = False)


Admissions_long = Admissions_culled.merge(Patients, on='SUBJECT_ID')
Admissions_long.head()
Admit_Series = pd.Series(Patients_DOD)
Admissions_culled_Admits = Admissions_culled['ADMITTIME']

Admit_times2 = pd.Series(Admissions_long['ADMITTIME'])
Atimes = []
for n in range(len(Admit_times2)):
    if pd.isnull(Admit_times2[n]) == True:  
        Atimes.append(np.nan)
    else:
        prelim = Admit_times2[n]
        Atimes.append(dt.datetime.strptime(prelim[0:10],'%Y-%m-%d').date())

DOB_Admit_times = pd.Series(Admissions_long['DOB'])
DOBAtimes = []
for n in range(len(DOB_Admit_times)):
    if pd.isnull(DOB_Admit_times[n]) == True:  
        DOBAtimes.append(np.nan)
    else:
        prelim = DOB_Admit_times[n]
        DOBAtimes.append(dt.datetime.strptime(prelim[0:10],'%Y-%m-%d').date())

Atimes = pd.Series(Atimes)
DOBAtimes = pd.Series(DOBAtimes)
        
admit_births = (Atimes - DOBAtimes)

Admit_Timet = []
for entry in range(len(admit_births)):
    if type(admit_births[entry]) == float:
        Admit_Timet.append(float('NAN'))
    else:
        Admit_Timet.append(admit_births[entry].days)

Admit_Timet = pd.Series(Admit_Timet)

# Find total number of visits for each subject ID
visit_count = pd.DataFrame(Admissions.SUBJECT_ID.value_counts())
visit_count = pd.DataFrame(Admissions.SUBJECT_ID.value_counts())
visit_count.reset_index(level=0, inplace=True)
visit_count.columns = ['SUBJECT_ID', 'ADMISSIONS']

Admissions_long = Admissions_long.merge(visit_count, on='SUBJECT_ID')

In [128]:
# Find causes of death for patients based on diagnosis
cdeath = []
subjectid = []
ldeath = []
ideath = []
edeath = []
tdeath = []
hamdiddeath = []
admitt = []

for entry in range(len(Admissions)):
    if Admissions['HOSPITAL_EXPIRE_FLAG'][entry] == 0:
        continue
    elif Admissions['HOSPITAL_EXPIRE_FLAG'][entry] == 1:
        subjectid.append(Admissions['SUBJECT_ID'][entry])
        cdeath.append(Admissions['DIAGNOSIS'][entry])
        ldeath.append(Admissions['ADMISSION_LOCATION'][entry])
        tdeath.append(Admissions['ADMISSION_TYPE'][entry])
        ideath.append(Admissions['INSURANCE'][entry])
        edeath.append(Admissions['ETHNICITY'][entry])
        hamdiddeath.append(Admissions['HADM_ID'][entry])
        admitt.append(Admissions['Total Admission Time'][entry])
        
cdeath = pd.Series(cdeath)
subjectid = pd.Series(subjectid)
ldeath = pd.Series(ldeath)
ideath = pd.Series(ideath)
edeath = pd.Series(edeath)
tdeath = pd.Series(tdeath)
hamdiddeath = pd.Series(hamdiddeath)
admitt = pd.Series(admitt)

Death = pd.DataFrame()
Death['SUBJECT_ID'] = subjectid

Death['CAUSE'] = cdeath

Death.head()

Unnamed: 0,SUBJECT_ID,CAUSE
0,31,STATUS EPILEPTICUS
1,56,HEAD BLEED
2,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA
3,67,SUBARACHNOID HEMORRHAGE
4,84,"GLIOBLASTOMA,NAUSEA"


In [129]:
kidney_failure_desc = Death[Death['CAUSE'].str.contains("SEPSIS|CREATININE|MYOCARDIAL|CARDIAC|HEART", na=False)==True]
cancer = Death[Death['CAUSE'].str.contains("CANCER", na=False)==True]
hypo = Death[Death['CAUSE'].str.contains("HYPOTENSION", na=False)==True]
shock = Death[Death['CAUSE'].str.contains("SEPTIC SHOCK", na=False)==True]
anemia = Death[Death['CAUSE'].str.contains("ANEMIA", na=False)==True]
hemo = Death[Death['CAUSE'].str.contains("HEMORRHAGE", na=False)==True]
acid = Death[Death['CAUSE'].str.contains("ACIDOSIS", na=False)==True]
liver = Death[Death['CAUSE'].str.contains("LIVER FAILURE", na=False)==True]
hyper = Death[Death['CAUSE'].str.contains("HYPERTENSION", na=False)==True]


del cancer['CAUSE']
cancer['CANCER_FLAG'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


In [130]:
# Merge admissions information and patients information for a more comprehensive demographics
demographics = pd.DataFrame()
demographics['SUBJECT_ID'] = Admissions_long['SUBJECT_ID']
demographics['GENDER'] = Admissions_long['GENDER']
demographics['HADM_ID'] = Admissions_long['HADM_ID']
demographics['DOB'] = Admissions_long['DOB-2']
demographics['DOD'] = Admissions_long['DOD-2']
demographics['DOA'] = Admissions_long['ADMITTIME']
demographics['ADMIT_AGE'] = Admit_Timet
demographics['ETHNICITY'] = Admissions_long['ETHNICITY']
demographics['MARITAL_STATUS'] = Admissions_long['MARITAL_STATUS']
demographics['LANGUAGE'] = Admissions_long['LANGUAGE']
demographics['RELIGION'] = Admissions_long['RELIGION']
demographics['INSURANCE'] = Admissions_long['INSURANCE']
demographics['ADMISSION_LOCATION'] = Admissions_long['ADMISSION_LOCATION']
demographics['#ADMISSIONS'] = Admissions_long['ADMISSIONS']
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,RELIGION,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS
0,22,F,165315,2131-05-07,,2196-04-09 12:26:00,23714,WHITE,MARRIED,,UNOBTAINABLE,Private,EMERGENCY ROOM ADMIT,1
1,23,M,152223,2082-07-17,,2153-09-03 07:15:00,25980,WHITE,MARRIED,,CATHOLIC,Medicare,PHYS REFERRAL/NORMAL DELI,2
2,23,M,124321,2082-07-17,,2157-10-18 19:34:00,27486,WHITE,MARRIED,ENGL,CATHOLIC,Medicare,TRANSFER FROM HOSP/EXTRAM,2
3,24,M,161859,2100-05-31,,2139-06-06 16:14:00,14250,WHITE,SINGLE,,PROTESTANT QUAKER,Private,TRANSFER FROM HOSP/EXTRAM,1
4,25,M,129635,2101-11-21,,2160-11-02 02:06:00,21531,WHITE,MARRIED,,UNOBTAINABLE,Private,EMERGENCY ROOM ADMIT,1


In [131]:
# Merge Death and demographics on subject ID to add relevant data about patient deaths
demographics = pd.merge(Death, demographics, on= 'SUBJECT_ID', how = 'outer')

In [132]:
demographics['DOA'] = pd.to_datetime(demographics['DOA'])
demographics['DOD'] = pd.to_datetime(demographics['DOD'])

In [133]:
# Calculate time it takes for patients to die since admission in hours
demographics['delta'] = demographics['DOD'] - demographics['DOA']    
demographics['delta'] = demographics['delta'] / np.timedelta64(1,'h')

In [134]:
demographics.head()

Unnamed: 0,SUBJECT_ID,CAUSE,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,RELIGION,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS,delta
0,31,STATUS EPILEPTICUS,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394,WHITE,MARRIED,,CATHOLIC,Medicare,TRANSFER FROM HOSP/EXTRAM,1,168.55
1,56,HEAD BLEED,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,109573,WHITE,,,NOT SPECIFIED,Medicare,EMERGENCY ROOM ADMIT,1,141.983333
2,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962,WHITE,MARRIED,,CATHOLIC,Private,PHYS REFERRAL/NORMAL DELI,2,5529.333333
3,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163,WHITE,MARRIED,,CATHOLIC,Private,CLINIC REFERRAL/PREMATURE,2,701.8
4,67,SUBARACHNOID HEMORRHAGE,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831,WHITE,SINGLE,,JEWISH,Medicare,PHYS REFERRAL/NORMAL DELI,2,24251.25


In [135]:
# Create a flag for patients outside of hospital where we don't have information on
death_cause = []
outside_death_cause = []
for entry in range(len(demographics)):
    if pd.isnull(demographics['DOD'][entry]) == False and pd.isnull(demographics['CAUSE'][entry]) == True:
        death_cause.append('Death Outside of Hospital')
        outside_death_cause.append(1)
    else: 
        death_cause.append(demographics['CAUSE'][entry])
        outside_death_cause.append(0)
        
death_cause = pd.Series(death_cause)
outside_death_flag = pd.Series(outside_death_cause)

demographics['CAUSE'] = death_cause
demographics['OUTSIDE_DEATH_FLAG'] = outside_death_flag
demographics.head()


# Add death flag within a 1 year frame
death_flag = []
for entry in range(len(demographics)):
    if demographics['delta'][entry] <= 17520:
        death_flag.append(1)
    else: 
        death_flag.append(0)


death_flag = pd.Series(death_flag)
demographics['DEATH_FLAG'] = death_flag

demographics['DEATH_FLAG'] = demographics['DEATH_FLAG'].fillna(0)

# Add too old flag
old_flag = []
for entry in range(len(demographics)):
    if demographics['ADMIT_AGE'][entry] > 32850:
        old_flag.append(1)
    else:
        old_flag.append(0)

old_flag = pd.Series(old_flag)
demographics['OLD_FLAG'] = old_flag

In [136]:
# Leave null for patients too old
age = []
for entry in range(len(demographics)):
     if demographics['OLD_FLAG'][entry] == 0:
            age.append(demographics['ADMIT_AGE'][entry])
     else: 
        age.append(np.nan)
age = pd.Series(age)
demographics['ADMIT_AGE'] = age
demographics.head()

Unnamed: 0,SUBJECT_ID,CAUSE,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,RELIGION,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS,delta,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG
0,31,STATUS EPILEPTICUS,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,CATHOLIC,Medicare,TRANSFER FROM HOSP/EXTRAM,1,168.55,0,1,0
1,56,HEAD BLEED,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,NOT SPECIFIED,Medicare,EMERGENCY ROOM ADMIT,1,141.983333,0,1,1
2,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,CATHOLIC,Private,PHYS REFERRAL/NORMAL DELI,2,5529.333333,0,1,0
3,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,CATHOLIC,Private,CLINIC REFERRAL/PREMATURE,2,701.8,0,1,0
4,67,SUBARACHNOID HEMORRHAGE,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,JEWISH,Medicare,PHYS REFERRAL/NORMAL DELI,2,24251.25,0,0,0


In [137]:
# Add cardiovascular flag to data
demographics = pd.merge(demographics, cancer, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,CAUSE,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,RELIGION,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS,delta,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG,CANCER_FLAG
0,31,STATUS EPILEPTICUS,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,CATHOLIC,Medicare,TRANSFER FROM HOSP/EXTRAM,1,168.55,0,1,0,
1,56,HEAD BLEED,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,NOT SPECIFIED,Medicare,EMERGENCY ROOM ADMIT,1,141.983333,0,1,1,
2,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,CATHOLIC,Private,PHYS REFERRAL/NORMAL DELI,2,5529.333333,0,1,0,
3,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,CATHOLIC,Private,CLINIC REFERRAL/PREMATURE,2,701.8,0,1,0,
4,67,SUBARACHNOID HEMORRHAGE,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,JEWISH,Medicare,PHYS REFERRAL/NORMAL DELI,2,24251.25,0,0,0,


In [138]:
cancer_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CANCER_FLAG'][entry]) == True:
        cancer_flag.append(1)
    else:
        cancer_flag.append(0)
    

cancer_flag = pd.Series(cancer_flag)
demographics['CANCER_FLAG'] = cancer_flag

In [139]:
# Test whether patients who died actually lived less than or equal to a year since admission
demographics[demographics['DEATH_FLAG'] == 1].head()

Unnamed: 0,SUBJECT_ID,CAUSE,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,RELIGION,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS,delta,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG,CANCER_FLAG
0,31,STATUS EPILEPTICUS,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,CATHOLIC,Medicare,TRANSFER FROM HOSP/EXTRAM,1,168.55,0,1,0,1
1,56,HEAD BLEED,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,NOT SPECIFIED,Medicare,EMERGENCY ROOM ADMIT,1,141.983333,0,1,1,1
2,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,CATHOLIC,Private,PHYS REFERRAL/NORMAL DELI,2,5529.333333,0,1,0,1
3,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,CATHOLIC,Private,CLINIC REFERRAL/PREMATURE,2,701.8,0,1,0,1
5,67,SUBARACHNOID HEMORRHAGE,M,155252,2084-06-05,2157-12-02,2157-12-02 00:45:00,26842.0,WHITE,SINGLE,,JEWISH,Medicare,EMERGENCY ROOM ADMIT,2,-0.75,0,1,0,1


In [140]:
# Extracting all AKI patients
codes = ['5845', '5846', '5847', '5848', '5949', '66930', '66932', '66934']

aki_Diagnoses_ICD = Diagnoses_ICD.loc[Diagnoses_ICD['ICD9_CODE'].str.match('|'.join(codes), na=False)] # All AKI patients
aki_Diagnoses = pd.DataFrame(aki_Diagnoses_ICD.SUBJECT_ID)
aki_Diagnoses = aki_Diagnoses.drop_duplicates()
aki_Diagnoses['AKI_DIAGNOSIS_FLAG'] = 1

# Merge with demographics file:
demographics = pd.merge(demographics, aki_Diagnoses, on= 'SUBJECT_ID', how = 'outer')

In [141]:
aki_Diagnoses.head()

Unnamed: 0,SUBJECT_ID,AKI_DIAGNOSIS_FLAG
46,115,1
1961,294,1
2109,307,1
2228,321,1
2282,141,1


In [142]:
# Add AKI diagnosis flags to demographic data 
aki_flag = []
for entry in range(len(demographics)):
    if demographics['AKI_DIAGNOSIS_FLAG'][entry] == 1:
        aki_flag.append(1)
    else:
        aki_flag.append(0)
aki_flag = pd.Series(aki_flag)
demographics['AKI_DIAGNOSIS_FLAG'] = aki_flag

In [143]:
# Add kidney failure cause of death flag to demographic data
demographics = pd.merge(demographics, kidney_failure_desc, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,CAUSE_x,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,...,INSURANCE,ADMISSION_LOCATION,#ADMISSIONS,delta,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG,CANCER_FLAG,AKI_DIAGNOSIS_FLAG,CAUSE_y
0,31,STATUS EPILEPTICUS,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,...,Medicare,TRANSFER FROM HOSP/EXTRAM,1,168.55,0,1,0,1,0,
1,56,HEAD BLEED,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,...,Medicare,EMERGENCY ROOM ADMIT,1,141.983333,0,1,1,1,0,
2,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,...,Private,PHYS REFERRAL/NORMAL DELI,2,5529.333333,0,1,0,1,0,
3,61,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,...,Private,CLINIC REFERRAL/PREMATURE,2,701.8,0,1,0,1,0,
4,67,SUBARACHNOID HEMORRHAGE,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,...,Medicare,PHYS REFERRAL/NORMAL DELI,2,24251.25,0,0,0,1,0,


In [144]:
kidney_failure_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        kidney_failure_flag.append(0)
    else:
        kidney_failure_flag.append(1)

kidney_failure_flag = pd.Series(kidney_failure_flag)
demographics['KIDNEY_FAILURE_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['KIDNEY_FAILURE_FLAG'] = kidney_failure_flag

In [145]:
# Add hypotension cause of death flag to demographic data
demographics = pd.merge(demographics, hypo, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,#ADMISSIONS,delta,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG,CANCER_FLAG,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,CAUSE_x,CAUSE_y
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,...,1,168.55,0,1,0,1,0,0,STATUS EPILEPTICUS,
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,1,141.983333,0,1,1,1,0,0,HEAD BLEED,
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,...,2,5529.333333,0,1,0,1,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,...,2,701.8,0,1,0,1,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,...,2,24251.25,0,0,0,1,0,0,SUBARACHNOID HEMORRHAGE,


In [146]:
hypo_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        hypo_flag.append(0)
    else:
        hypo_flag.append(1)

hypo_flag = pd.Series(hypo_flag)
demographics['HYPO_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['HYPO_FLAG'] = hypo_flag

In [147]:
# Add septic shock cause of death flag to demographic data
demographics = pd.merge(demographics, shock, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,delta,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG,CANCER_FLAG,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,CAUSE_x,CAUSE_y
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,...,168.55,0,1,0,1,0,0,0,STATUS EPILEPTICUS,
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,141.983333,0,1,1,1,0,0,0,HEAD BLEED,
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,...,5529.333333,0,1,0,1,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,...,701.8,0,1,0,1,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,...,24251.25,0,0,0,1,0,0,0,SUBARACHNOID HEMORRHAGE,


In [148]:
shock_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        shock_flag.append(0)
    else:
        shock_flag.append(1)

shock_flag = pd.Series(shock_flag)
demographics['SHOCK_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['SHOCK_FLAG'] = shock_flag

In [149]:
# Add anemia cause of death flag to demographic data
demographics = pd.merge(demographics, anemia, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,OUTSIDE_DEATH_FLAG,DEATH_FLAG,OLD_FLAG,CANCER_FLAG,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,SHOCK_FLAG,CAUSE_x,CAUSE_y
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,...,0,1,0,1,0,0,0,0,STATUS EPILEPTICUS,
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,0,1,1,1,0,0,0,0,HEAD BLEED,
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,...,0,1,0,1,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,...,0,1,0,1,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,...,0,0,0,1,0,0,0,0,SUBARACHNOID HEMORRHAGE,


In [150]:
anemia_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        anemia_flag.append(0)
    else:
        anemia_flag.append(1)

anemia_flag = pd.Series(anemia_flag)
demographics['ANEMIA_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['ANEMIA_FLAG'] = anemia_flag

In [151]:
# Add hemorrhage cause of death flag to demographic data
demographics = pd.merge(demographics, hemo, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,DEATH_FLAG,OLD_FLAG,CANCER_FLAG,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,SHOCK_FLAG,ANEMIA_FLAG,CAUSE_x,CAUSE_y
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,...,1,0,1,0,0,0,0,0,STATUS EPILEPTICUS,
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,1,1,1,0,0,0,0,0,HEAD BLEED,
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,...,1,0,1,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,...,1,0,1,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,...,0,0,1,0,0,0,0,0,SUBARACHNOID HEMORRHAGE,SUBARACHNOID HEMORRHAGE


In [152]:
hemo_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        hemo_flag.append(0)
    else:
        hemo_flag.append(1)

hemo_flag = pd.Series(hemo_flag)
demographics['HEMO_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['HEMO_FLAG'] = hemo_flag

In [153]:
# Add acidosis cause of death flag to demographic data
demographics = pd.merge(demographics, acid, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,OLD_FLAG,CANCER_FLAG,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,SHOCK_FLAG,ANEMIA_FLAG,HEMO_FLAG,CAUSE_x,CAUSE_y
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,...,0,1,0,0,0,0,0,0,STATUS EPILEPTICUS,
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,1,1,0,0,0,0,0,0,HEAD BLEED,
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,...,0,1,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,...,0,1,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,...,0,1,0,0,0,0,0,1,SUBARACHNOID HEMORRHAGE,


In [154]:
acid_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        acid_flag.append(0)
    else:
        acid_flag.append(1)

acid_flag = pd.Series(acid_flag)
demographics['ACID_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['ACID_FLAG'] = acid_flag

In [155]:
# Add liver failure cause of death flag to demographic data
demographics = pd.merge(demographics, liver, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,CANCER_FLAG,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,SHOCK_FLAG,ANEMIA_FLAG,HEMO_FLAG,ACID_FLAG,CAUSE_x,CAUSE_y
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,...,1,0,0,0,0,0,0,0,STATUS EPILEPTICUS,
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,1,0,0,0,0,0,0,0,HEAD BLEED,
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,...,1,0,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,...,1,0,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,...,1,0,0,0,0,0,1,0,SUBARACHNOID HEMORRHAGE,


In [156]:
liver_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        liver_flag.append(0)
    else:
        liver_flag.append(1)
        
liver_flag = pd.Series(liver_flag)
demographics['LIVER_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['LIVER_FLAG'] = liver_flag


In [157]:
# Add hypertension cause of death flag to demographic data
demographics = pd.merge(demographics, hyper, on= 'SUBJECT_ID', how = 'outer')
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,SHOCK_FLAG,ANEMIA_FLAG,HEMO_FLAG,ACID_FLAG,LIVER_FLAG,CAUSE_x,CAUSE_y
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,26394.0,WHITE,MARRIED,,...,0,0,0,0,0,0,0,0,STATUS EPILEPTICUS,
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,0,0,0,0,0,0,0,0,HEAD BLEED,
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,19962.0,WHITE,MARRIED,,...,0,0,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,20163.0,WHITE,MARRIED,,...,0,0,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,25831.0,WHITE,SINGLE,,...,0,0,0,0,0,1,0,0,SUBARACHNOID HEMORRHAGE,


In [158]:
hyper_flag = []

for entry in range(len(demographics)):
    if pd.isnull(demographics['CAUSE_y'][entry]) == True:
        hyper_flag.append(0)
    else:
        hyper_flag.append(1)
        
hyper_flag = pd.Series(hyper_flag)
demographics['HYPER_FLAG'] = aki_flag

del demographics['CAUSE_y']

demographics['CAUSE'] = demographics['CAUSE_x']
del demographics['CAUSE_x']
demographics['HYPER_FLAG'] = hyper_flag

In [159]:
# Add age in years to simplify interpretation
demographics['ADMIT_AGE'] = demographics['ADMIT_AGE']/365

In [160]:
# Test
demographics[(demographics['HYPER_FLAG'] == 1) & (demographics['AKI_DIAGNOSIS_FLAG'] == 1) ]

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,SHOCK_FLAG,ANEMIA_FLAG,HEMO_FLAG,ACID_FLAG,LIVER_FLAG,HYPER_FLAG,CAUSE
2818,16421,M,101710,2043-05-31,2109-04-04,2109-03-28 23:19:00,65.868493,UNKNOWN/NOT SPECIFIED,MARRIED,,...,1,0,0,0,0,0,0,0,1,CHRONIC RENAL FAILURE;HYPERTENSION
3136,14999,M,132503,2083-05-07,2155-08-20,2155-08-15 11:43:00,72.320548,WHITE,MARRIED,,...,1,0,0,0,0,0,0,0,1,CORONARY ARTERY DISEASE;HYPERTENSION\CATH
6581,47493,F,149237,2124-10-23,2195-08-10,2195-07-27 20:14:00,70.805479,WHITE,SINGLE,ENGL,...,1,0,0,0,0,0,0,0,1,HYPERTENSION;GALLSTONE PANCREATITIS
7575,78697,M,121516,2030-08-28,2111-07-19,2111-07-19 00:01:00,80.942466,UNABLE TO OBTAIN,MARRIED,ENGL,...,1,0,0,0,0,0,0,0,1,ACUTE RENAL FAILURE;HYPERTENSION


In [161]:
demographics.head()

Unnamed: 0,SUBJECT_ID,GENDER,HADM_ID,DOB,DOD,DOA,ADMIT_AGE,ETHNICITY,MARITAL_STATUS,LANGUAGE,...,AKI_DIAGNOSIS_FLAG,KIDNEY_FAILURE_FLAG,HYPO_FLAG,SHOCK_FLAG,ANEMIA_FLAG,HEMO_FLAG,ACID_FLAG,LIVER_FLAG,HYPER_FLAG,CAUSE
0,31,M,128652,2036-05-17,2108-08-30,2108-08-22 23:27:00,72.312329,WHITE,MARRIED,,...,0,0,0,0,0,0,0,0,0,STATUS EPILEPTICUS
1,56,F,181711,1804-01-02,2104-01-08,2104-01-02 02:01:00,,WHITE,,,...,0,0,0,0,0,0,0,0,0,HEAD BLEED
2,61,M,176332,2063-10-21,2119-02-03,2118-06-17 14:40:00,54.690411,WHITE,MARRIED,,...,0,0,0,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA
3,61,M,189535,2063-10-21,2119-02-03,2119-01-04 18:12:00,55.241096,WHITE,MARRIED,,...,0,0,0,0,0,0,0,0,0,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA
4,67,M,186474,2084-06-05,2157-12-02,2155-02-25 12:45:00,70.769863,WHITE,SINGLE,,...,0,0,0,0,0,1,0,0,0,SUBARACHNOID HEMORRHAGE


In [162]:
demographics.to_csv('testing.csv')