### Split admissions dataset into two different sets for patients who survived and patients who died. Calculation time to death or length of stay.

In [1]:
import pandas as pd
import datetime
import numpy as np

In [2]:
patients = pd.read_csv('../Resources/PATIENTS.csv')
admissions = pd.read_csv('../Resources/ADMISSIONS.csv')

In [3]:
patients.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,GENDER,DOB,DOD,DOD_HOSP,DOD_SSN,EXPIRE_FLAG
0,234,249,F,2075-03-13 00:00:00,,,,0
1,235,250,F,2164-12-27 00:00:00,2188-11-22 00:00:00,2188-11-22 00:00:00,,1
2,236,251,M,2090-03-15 00:00:00,,,,0
3,237,252,M,2078-03-06 00:00:00,,,,0
4,238,253,F,2089-11-26 00:00:00,,,,0


In [4]:
admissions.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,CATHOLIC,MARRIED,WHITE,,,BRAIN MASS,0,1
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,PROTESTANT QUAKER,SINGLE,WHITE,,,INTERIOR MYOCARDIAL INFARCTION,0,1
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,UNOBTAINABLE,MARRIED,WHITE,2160-11-02 01:01:00,2160-11-02 04:27:00,ACUTE CORONARY SYNDROME,0,1


Convert admit/discharge times to datetime format

In [5]:
admissions['ADMITTIME'] = admissions['ADMITTIME'].apply(lambda x:datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
admissions['DISCHTIME'] = admissions['DISCHTIME'].apply(lambda x:datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))

Create new dataframesfor admission of patients who survived/died

In [6]:
admissions_survived = admissions[admissions['DEATHTIME'].isna()].drop('DEATHTIME', axis = 1)
admissions_died = admissions[~admissions['DEATHTIME'].isna()].drop('DISCHTIME', axis = 1)

Calculate length of stay for patients who survived

In [7]:
admissions_survived['LENGTH_OF_STAY'] = (admissions_survived['DISCHTIME']-admissions_survived['ADMITTIME']) / np.timedelta64(1, 'D')
admissions_survived.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DISCHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,LENGTH_OF_STAY
0,21,22,165315,2196-04-09 12:26:00,2196-04-10 15:54:00,EMERGENCY,EMERGENCY ROOM ADMIT,DISC-TRAN CANCER/CHLDRN H,Private,,UNOBTAINABLE,MARRIED,WHITE,2196-04-09 10:06:00,2196-04-09 13:24:00,BENZODIAZEPINE OVERDOSE,0,1,1.144444
1,22,23,152223,2153-09-03 07:15:00,2153-09-08 19:10:00,ELECTIVE,PHYS REFERRAL/NORMAL DELI,HOME HEALTH CARE,Medicare,,CATHOLIC,MARRIED,WHITE,,,CORONARY ARTERY DISEASE\CORONARY ARTERY BYPASS...,0,1,5.496528
2,23,23,124321,2157-10-18 19:34:00,2157-10-25 14:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME HEALTH CARE,Medicare,ENGL,CATHOLIC,MARRIED,WHITE,,,BRAIN MASS,0,1,6.768056
3,24,24,161859,2139-06-06 16:14:00,2139-06-09 12:48:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,HOME,Private,,PROTESTANT QUAKER,SINGLE,WHITE,,,INTERIOR MYOCARDIAL INFARCTION,0,1,2.856944
4,25,25,129635,2160-11-02 02:06:00,2160-11-05 14:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,HOME,Private,,UNOBTAINABLE,MARRIED,WHITE,2160-11-02 01:01:00,2160-11-02 04:27:00,ACUTE CORONARY SYNDROME,0,1,3.534028


Convert deathtime to datetime format and calculate time to death for patients who did not survive

In [8]:
admissions_died['DEATHTIME'] = admissions_died['DEATHTIME'].apply(lambda x:datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
admissions_died['DAYS_TO_DEATH'] = (admissions_died['DEATHTIME']-admissions_died['ADMITTIME']) / np.timedelta64(1, 'D')
admissions_died.head()

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ADMITTIME,DEATHTIME,ADMISSION_TYPE,ADMISSION_LOCATION,DISCHARGE_LOCATION,INSURANCE,LANGUAGE,RELIGION,MARITAL_STATUS,ETHNICITY,EDREGTIME,EDOUTTIME,DIAGNOSIS,HOSPITAL_EXPIRE_FLAG,HAS_CHARTEVENTS_DATA,DAYS_TO_DEATH
9,30,31,128652,2108-08-22 23:27:00,2108-08-30 15:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,,CATHOLIC,MARRIED,WHITE,,,STATUS EPILEPTICUS,1,1,7.647917
57,55,56,181711,2104-01-02 02:01:00,2104-01-08 10:30:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,NOT SPECIFIED,,WHITE,2104-01-01 23:59:00,2104-01-02 03:33:00,HEAD BLEED,1,1,6.353472
63,61,61,189535,2119-01-04 18:12:00,2119-02-03 01:35:00,EMERGENCY,CLINIC REFERRAL/PREMATURE,DEAD/EXPIRED,Private,,CATHOLIC,MARRIED,WHITE,,,NON-HODGKINS LYMPHOMA;FEBRILE;NEUTROPENIA,1,1,29.307639
70,68,67,155252,2157-12-02 00:45:00,2157-12-02 03:55:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,,JEWISH,SINGLE,WHITE,2157-12-01 20:45:00,2157-12-02 00:55:00,SUBARACHNOID HEMORRHAGE,1,1,0.131944
88,86,84,166401,2196-04-14 04:02:00,2196-04-17 13:42:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Private,,OTHER,MARRIED,WHITE,2196-04-13 22:23:00,2196-04-14 04:31:00,"GLIOBLASTOMA,NAUSEA",1,1,3.402778


Export admissions dataframes to csv

In [9]:
admissions_survived.to_csv('../Resources/admissions_survived.csv', index=False)
admissions_died.to_csv('../Resources/admissions_died.csv', index=False)

In [10]:
admissions_survived.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 53122 entries, 0 to 58975
Data columns (total 19 columns):
ROW_ID                  53122 non-null int64
SUBJECT_ID              53122 non-null int64
HADM_ID                 53122 non-null int64
ADMITTIME               53122 non-null datetime64[ns]
DISCHTIME               53122 non-null datetime64[ns]
ADMISSION_TYPE          53122 non-null object
ADMISSION_LOCATION      53122 non-null object
DISCHARGE_LOCATION      53122 non-null object
INSURANCE               53122 non-null object
LANGUAGE                30603 non-null object
RELIGION                52761 non-null object
MARITAL_STATUS          43655 non-null object
ETHNICITY               53122 non-null object
EDREGTIME               26792 non-null object
EDOUTTIME               26792 non-null object
DIAGNOSIS               53098 non-null object
HOSPITAL_EXPIRE_FLAG    53122 non-null int64
HAS_CHARTEVENTS_DATA    53122 non-null int64
LENGTH_OF_STAY          53122 non-null float64
dtype

In [11]:
admissions_died.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5854 entries, 9 to 58972
Data columns (total 19 columns):
ROW_ID                  5854 non-null int64
SUBJECT_ID              5854 non-null int64
HADM_ID                 5854 non-null int64
ADMITTIME               5854 non-null datetime64[ns]
DEATHTIME               5854 non-null datetime64[ns]
ADMISSION_TYPE          5854 non-null object
ADMISSION_LOCATION      5854 non-null object
DISCHARGE_LOCATION      5854 non-null object
INSURANCE               5854 non-null object
LANGUAGE                3041 non-null object
RELIGION                5757 non-null object
MARITAL_STATUS          5193 non-null object
ETHNICITY               5854 non-null object
EDREGTIME               4085 non-null object
EDOUTTIME               4085 non-null object
DIAGNOSIS               5853 non-null object
HOSPITAL_EXPIRE_FLAG    5854 non-null int64
HAS_CHARTEVENTS_DATA    5854 non-null int64
DAYS_TO_DEATH           5854 non-null float64
dtypes: datetime64[ns](2)