In [1]:
import os, sys
import pandas as pd
import numpy as np
import pickle
import datetime

In [207]:
patient_path = "PATIENTS.csv"
icustay_path = "ICUSTAYS.csv"
patients = pd.read_csv(patient_path)
icus = pd.read_csv(icustay_path)

In [208]:
temp = icus[(icus['FIRST_CAREUNIT'] == icus['LAST_CAREUNIT'])]
temp = temp[temp.LAST_CAREUNIT == 'MICU']
temp = temp.drop(columns=['ROW_ID'])
temp['INTIME'] = pd.to_datetime(temp['INTIME'], infer_datetime_format=True)
temp['OUTTIME'] = pd.to_datetime(temp['OUTTIME'], infer_datetime_format=True)

In [209]:
patients['DOB'] = pd.to_datetime(patients['DOB'], infer_datetime_format=True)
patients['DOD'] = pd.to_datetime(patients['DOD'], infer_datetime_format=True)
patients['DOD_HOSP'] = pd.to_datetime(patients['DOD_HOSP'], infer_datetime_format=True)
patients['DOD_SSN'] = pd.to_datetime(patients['DOD_SSN'], infer_datetime_format=True)
patients = patients.drop(columns=['ROW_ID'])

In [210]:
small_patients = patients[patients.SUBJECT_ID.isin(temp.SUBJECT_ID)]
temp = temp.merge(small_patients, on='SUBJECT_ID', how='left')

In [211]:
datediff = np.array(temp.INTIME.dt.date) - np.array(temp.DOB.dt.date)
age = np.array([x.days // 365 for x in datediff])
temp['age'] = age

In [212]:
temp = temp[temp.age >= 18]

In [213]:
readmit = temp.groupby('HADM_ID')['ICUSTAY_ID'].count()
readmit_labels = (readmit > 1).astype('int64').to_frame().rename(columns={'ICUSTAY_ID':'readmission'})

In [214]:
small_temp = temp.loc[temp.groupby('HADM_ID').INTIME.idxmin()]

In [215]:
readmission_cohort = small_temp.join(readmit_labels, on='HADM_ID')

In [216]:
#readmission_cohort[readmission_cohort.HADM_ID == 131488]
#(readmission_cohort['LOS'] < 1.).sum()
readmission_cohort.shape

(19149, 19)

In [217]:
cohort = readmission_cohort

In [218]:
dead = cohort[~pd.isnull(cohort.DOD_HOSP)].copy()

In [219]:
dead_labels = ((dead.DOD_HOSP.dt.date > dead.INTIME.dt.date) & (dead.DOD_HOSP.dt.date <= dead.OUTTIME.dt.date))
dead_labels = dead_labels.astype('int64')
dead['mortality'] = np.array(dead_labels)

In [255]:
cohort = cohort.merge(dead.iloc[:, [2,-1]], on='ICUSTAY_ID', how='left')
cohort['mortality'] = cohort['mortality'].fillna(0)
cohort = cohort.astype({'mortality': int})

In [256]:
cohort['los>3day'] = (cohort['LOS'] > 3.).astype('int64')
cohort['los>7day'] = (cohort['LOS'] > 7.).astype('int64')

In [261]:
cohort12h = cohort[cohort['LOS'] > 0.5]

In [265]:
dxPath = 'DIAGNOSES_ICD.csv'
dxDf = pd.read_csv(dxPath)

In [266]:
cohort12hDx = dxDf[dxDf.HADM_ID.isin(cohort12h.HADM_ID)]

In [267]:
diagnosis = cohort12hDx.groupby('HADM_ID')['ICD9_CODE'].apply(list).to_frame()

In [273]:
tempdf = cohort12h.join(diagnosis, on='HADM_ID')

In [291]:
tempdf.INTIME

0       2117-09-11 11:47:35
1       2150-04-17 15:35:42
2       2108-04-06 15:50:15
3       2188-05-24 13:07:20
4       2103-03-11 00:54:00
                ...        
19144   2181-05-15 18:20:51
19145   2111-11-01 14:19:22
19146   2181-04-07 10:53:54
19147   2178-09-02 21:13:22
19148   2188-07-07 18:49:00
Name: INTIME, Length: 18799, dtype: datetime64[ns]

In [292]:
#tempdf['observation_window'] = tempdf
tempdf['12h_obs'] = tempdf.INTIME + pd.Timedelta(pd.offsets.Hour(12))
tempdf['24h_obs'] = tempdf.INTIME + pd.Timedelta(pd.offsets.Hour(24))

In [294]:
pickle.dump(tempdf, open('mimic_cohort.pk', 'wb'), -1)

In [2]:
final_cohort = pickle.load(open('mimic_cohort.pk', 'rb'))

In [3]:
final_cohort

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,DBSOURCE,FIRST_CAREUNIT,LAST_CAREUNIT,FIRST_WARDID,LAST_WARDID,INTIME,OUTTIME,...,DOD_SSN,EXPIRE_FLAG,age,readmission,mortality,los>3day,los>7day,ICD9_CODE,12h_obs,24h_obs
0,58526,100001,275225,metavision,MICU,MICU,52,52,2117-09-11 11:47:35,2117-09-15 17:57:14,...,NaT,0,35,0,0,1,0,"[25013, 3371, 5849, 5780, V5867, 25063, 5363, ...",2117-09-11 23:47:35,2117-09-12 11:47:35
1,54610,100003,209281,metavision,MICU,MICU,50,50,2150-04-17 15:35:42,2150-04-19 14:12:52,...,2150-12-28,1,59,0,0,0,0,"[53100, 2851, 07054, 5715, 45621, 53789, 4019,...",2150-04-18 03:35:42,2150-04-18 15:35:42
2,9895,100006,291788,carevue,MICU,MICU,15,15,2108-04-06 15:50:15,2108-04-11 15:18:03,...,NaT,1,48,0,0,1,0,"[49320, 51881, 486, 20300, 2761, 7850, 3090, V...",2108-04-07 03:50:15,2108-04-07 15:50:15
3,68591,100016,217590,metavision,MICU,MICU,52,23,2188-05-24 13:07:20,2188-05-30 17:16:33,...,2188-07-06,1,55,0,0,1,0,"[5070, 51881, 25541, 47874, 7580, 34590, 2512,...",2188-05-25 01:07:20,2188-05-25 13:07:20
4,16229,100017,258320,carevue,MICU,MICU,15,15,2103-03-11 00:54:00,2103-03-11 17:31:00,...,NaT,0,27,0,0,0,0,"[9696, 51881, 78009, 2760, E9503, 29634, 30470...",2103-03-11 12:54:00,2103-03-12 00:54:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19144,64296,199949,275223,metavision,MICU,MICU,52,52,2181-05-15 18:20:51,2181-05-20 22:45:51,...,2181-05-20,1,61,0,1,1,0,"[1960, 34400, 51881, 1984, 1983, 3363, 1628, 1...",2181-05-16 06:20:51,2181-05-16 18:20:51
19145,2452,199951,296780,carevue,MICU,MICU,15,15,2111-11-01 14:19:22,2111-11-03 17:57:54,...,2113-01-06,1,57,0,0,0,0,"[486, 4241, 2765, 25051, 40391, 25041, 25061, ...",2111-11-02 02:19:22,2111-11-02 14:19:22
19146,97714,199957,253382,metavision,MICU,MICU,50,50,2181-04-07 10:53:54,2181-04-08 21:17:17,...,2181-07-11,1,81,0,0,0,0,"[25080, 262, 70723, 49392, 2867, V5867, E9323,...",2181-04-07 22:53:54,2181-04-08 10:53:54
19147,26705,199958,277656,metavision,MICU,MICU,50,50,2178-09-02 21:13:22,2178-09-03 18:24:38,...,NaT,0,45,0,0,0,0,"[49322, 53081, 32723, 3051, 7850, 30000, 79431...",2178-09-03 09:13:22,2178-09-03 21:13:22


In [7]:
final_cohort[final_cohort.DBSOURCE == 'metavision']

Unnamed: 0,SUBJECT_ID,HADM_ID,ICUSTAY_ID,DBSOURCE,FIRST_CAREUNIT,LAST_CAREUNIT,FIRST_WARDID,LAST_WARDID,INTIME,OUTTIME,...,DOD_SSN,EXPIRE_FLAG,age,readmission,mortality,los>3day,los>7day,ICD9_CODE,12h_obs,24h_obs
0,58526,100001,275225,metavision,MICU,MICU,52,52,2117-09-11 11:47:35,2117-09-15 17:57:14,...,NaT,0,35,0,0,1,0,"[25013, 3371, 5849, 5780, V5867, 25063, 5363, ...",2117-09-11 23:47:35,2117-09-12 11:47:35
1,54610,100003,209281,metavision,MICU,MICU,50,50,2150-04-17 15:35:42,2150-04-19 14:12:52,...,2150-12-28,1,59,0,0,0,0,"[53100, 2851, 07054, 5715, 45621, 53789, 4019,...",2150-04-18 03:35:42,2150-04-18 15:35:42
3,68591,100016,217590,metavision,MICU,MICU,52,23,2188-05-24 13:07:20,2188-05-30 17:16:33,...,2188-07-06,1,55,0,0,1,0,"[5070, 51881, 25541, 47874, 7580, 34590, 2512,...",2188-05-25 01:07:20,2188-05-25 13:07:20
7,48539,100035,245719,metavision,MICU,MICU,50,50,2115-02-22 06:52:06,2115-03-04 19:00:50,...,NaT,0,36,0,0,1,1,"[49391, 4275, 34982, 51881, 78001, 4254, 5849,...",2115-02-22 18:52:06,2115-02-23 06:52:06
8,58947,100037,270105,metavision,MICU,MICU,23,23,2183-03-23 18:22:04,2183-03-25 14:27:12,...,NaT,0,58,1,0,0,0,"[20501, 5849, 430, 486, 4538, 6826, 2875, 2841...",2183-03-24 06:22:04,2183-03-24 18:22:04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19141,67735,199929,216610,metavision,MICU,MICU,50,50,2200-10-03 14:54:34,2200-10-06 19:12:05,...,NaT,0,40,0,0,1,0,"[03849, 5721, 4561, 5568, 2761, V427, 2639, 99...",2200-10-04 02:54:34,2200-10-04 14:54:34
19143,49225,199948,224174,metavision,MICU,MICU,52,52,2102-02-24 19:30:03,2102-02-26 14:29:42,...,NaT,0,74,0,0,0,0,"[5691, 51881, 2762, 5119, 5990, 3051, 56210, 9...",2102-02-25 07:30:03,2102-02-25 19:30:03
19144,64296,199949,275223,metavision,MICU,MICU,52,52,2181-05-15 18:20:51,2181-05-20 22:45:51,...,2181-05-20,1,61,0,1,1,0,"[1960, 34400, 51881, 1984, 1983, 3363, 1628, 1...",2181-05-16 06:20:51,2181-05-16 18:20:51
19146,97714,199957,253382,metavision,MICU,MICU,50,50,2181-04-07 10:53:54,2181-04-08 21:17:17,...,2181-07-11,1,81,0,0,0,0,"[25080, 262, 70723, 49392, 2867, V5867, E9323,...",2181-04-07 22:53:54,2181-04-08 10:53:54
