In [None]:
import pandas

In [None]:
from google.colab import drive
drive.mount('/content/drive') 

directory = "/content/drive/MyDrive/cs598project/data"

Mounted at /content/drive


In [None]:
# FILTER PATIENTS UNDER 18
from tables.description import Time32Col
from dateutil.relativedelta import relativedelta
def filter_age(data):
    data['AGE'] = data.apply(
        lambda row: relativedelta(row['ADMITTIME'], row['DOB']).years, axis=1
        )

    data = data[data['AGE'] >= 18]
    return data

In [None]:
# FILTER ROWS WHERE PATIENT DIED INSIDE ICU 
def filter_death_in_icu(data): 
    survived_icu = data[
        ~( 
    (data['DEATHTIME'] >= data['INTIME']) &
    (data['DEATHTIME'] <= data['OUTTIME'])) 
    ]
    return survived_icu


In [None]:
admissions = pandas.read_csv(f"{directory}/ADMISSIONS.csv")
patients = pandas.read_csv(f"{directory}/PATIENTS.csv")
icu_stays = pandas.read_csv(f"{directory}/ICUSTAYS.csv")
transfers = pandas.read_csv(f"{directory}/TRANSFERS.csv")

print('ADMISSIONS:', admissions.columns)
print('PATIENTS:', patients.columns)
print('ICU_STAYS:', icu_stays.columns)
print('TRANSFERS:', transfers.columns)

# Datetime conversion
transfers['INTIME'] = pandas.to_datetime(transfers['INTIME'])
transfers['OUTTIME'] = pandas.to_datetime(transfers['OUTTIME'])
admissions['DISCHTIME'] = pandas.to_datetime(admissions['DISCHTIME'])
admissions['ADMITTIME'] = pandas.to_datetime(admissions['ADMITTIME'])
admissions['DEATHTIME'] = pandas.to_datetime(admissions['DEATHTIME'])
icu_stays['INTIME'] = pandas.to_datetime(icu_stays['INTIME'])
icu_stays['OUTTIME'] = pandas.to_datetime(icu_stays['OUTTIME'])
patients['DOB'] = pandas.to_datetime(patients['DOB'])
patients['DOD'] = pandas.to_datetime(patients['DOD'])

ADMISSIONS: Index(['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ADMITTIME', 'DISCHTIME',
       'DEATHTIME', 'ADMISSION_TYPE', 'ADMISSION_LOCATION',
       'DISCHARGE_LOCATION', 'INSURANCE', 'LANGUAGE', 'RELIGION',
       'MARITAL_STATUS', 'ETHNICITY', 'EDREGTIME', 'EDOUTTIME', 'DIAGNOSIS',
       'HOSPITAL_EXPIRE_FLAG', 'HAS_CHARTEVENTS_DATA'],
      dtype='object')
PATIENTS: Index(['ROW_ID', 'SUBJECT_ID', 'GENDER', 'DOB', 'DOD', 'DOD_HOSP', 'DOD_SSN',
       'EXPIRE_FLAG'],
      dtype='object')
ICU_STAYS: Index(['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'DBSOURCE',
       'FIRST_CAREUNIT', 'LAST_CAREUNIT', 'FIRST_WARDID', 'LAST_WARDID',
       'INTIME', 'OUTTIME', 'LOS'],
      dtype='object')
TRANSFERS: Index(['ROW_ID', 'SUBJECT_ID', 'HADM_ID', 'ICUSTAY_ID', 'DBSOURCE',
       'EVENTTYPE', 'PREV_CAREUNIT', 'CURR_CAREUNIT', 'PREV_WARDID',
       'CURR_WARDID', 'INTIME', 'OUTTIME', 'LOS'],
      dtype='object')


In [None]:
# Join tables of interest to allow for target assignment to ICU cases
# Apply filters described in paper

print('ICU pre filters: ', icu_stays.shape[0])

patient_admissions = patients.merge(admissions, how='inner', on='SUBJECT_ID', suffixes=('', '_adm'))
df = filter_age(patient_admissions)

icu_stays = icu_stays[icu_stays['SUBJECT_ID'].isin(df['SUBJECT_ID'])]

df = df.merge(icu_stays, how='inner', on=['SUBJECT_ID', 'HADM_ID'], suffixes=('', '_icu'))
df = filter_death_in_icu(df)

print('ICU post filters: ', len(df['ICUSTAY_ID'].unique())) # compared to 48,393 in the paper

ICU pre filters:  61532
ICU post filters:  48886


In [None]:
transfers

Unnamed: 0,ROW_ID,SUBJECT_ID,HADM_ID,ICUSTAY_ID,DBSOURCE,EVENTTYPE,PREV_CAREUNIT,CURR_CAREUNIT,PREV_WARDID,CURR_WARDID,INTIME,OUTTIME,LOS
0,657,111,192123,254245.0,carevue,transfer,CCU,MICU,7.0,23.0,2142-04-29 15:27:11,2142-05-04 20:38:33,125.19
1,658,111,192123,,carevue,transfer,MICU,,23.0,45.0,2142-05-04 20:38:33,2142-05-05 11:46:32,15.13
2,659,111,192123,,carevue,discharge,,,45.0,,2142-05-05 11:46:32,NaT,
3,660,111,155897,249202.0,metavision,admit,,MICU,,52.0,2144-07-01 04:13:59,2144-07-01 05:19:39,1.09
4,661,111,155897,,metavision,transfer,MICU,,52.0,32.0,2144-07-01 05:19:39,2144-07-01 06:28:29,1.15
...,...,...,...,...,...,...,...,...,...,...,...,...,...
261892,259671,98385,195599,,metavision,transfer,,,36.0,49.0,2108-10-06 11:27:11,2108-10-06 13:05:57,1.65
261893,259672,98385,195599,292167.0,metavision,transfer,,SICU,49.0,33.0,2108-10-06 13:05:57,2108-10-11 17:00:31,123.91
261894,259673,98385,195599,,metavision,discharge,SICU,,33.0,,2108-10-11 17:00:31,NaT,
261895,259674,98389,155368,,metavision,admit,,,,29.0,2153-10-14 22:12:58,2153-10-14 22:21:06,0.14


In [None]:
# case i) patients were transferred to low-level wards from ICU, but returned to ICU again (3,555 records); 
    #  consecutive ICU stays for same patient/admission where a transfer exists between them with no ICUSTAY_ID that is not a discharge
icu_stays['target'] = 0

for row in icu_stays[['SUBJECT_ID', 'HADM_ID']].drop_duplicates().iterrows():
    row = row[1]
    sid, hid = row['SUBJECT_ID'], row['HADM_ID']
    stays = icu_stays[(icu_stays['SUBJECT_ID'] == sid) & (icu_stays['HADM_ID'] == hid)][['ICUSTAY_ID', 'OUTTIME', 'INTIME']]
    
    if stays.shape[0] < 2:  # Ignore if no consecutive ICU stays in visit 
        continue

    stays = stays.sort_values('OUTTIME')
    stay_ids = stays['ICUSTAY_ID'].tolist()

    # Mask transfers for visit where transfer is not a discharge and no associated ICUSTAY ID - this would imply a transfer not to the ICU
    transfer_mask = (transfers['SUBJECT_ID'] == sid) & (transfers['HADM_ID'] == hid) & (transfers['EVENTTYPE'] != 'discharge') & (transfers['ICUSTAY_ID'].isna())

    for i, id in enumerate(stay_ids[:-1]): 
        stay1 = stays[(stays['ICUSTAY_ID'] == stay_ids[i])] # .iloc[0]
        stay1 = stay1[stay1['OUTTIME'] == stay1['OUTTIME'].min()].iloc[0]

        stay2 = stays[stays['ICUSTAY_ID'] == stay_ids[i+1]] #.iloc[0]
        stay2 = stay2[stay2['INTIME'] == stay2['INTIME'].max()].iloc[-1]

        # Mask for transfer to lower ward between the current and next ICU stay
        ts = transfers[transfer_mask & (transfers['INTIME'] >= stay1['OUTTIME']) & (transfers['OUTTIME'] <= stay2['INTIME'])]
        if ts.shape[0] > 0:
            # If a transfer is found, mark current ICU stay with true label
            icu_stays.loc[icu_stays['ICUSTAY_ID'] == id, 'target'] = 1

icu_stays[icu_stays['target'] == 1].shape [0] # 3636 

3636

In [None]:
icu_stays[icu_stays['target'] == 1][['ICUSTAY_ID', 'target']].to_csv(f"{directory}/icu_labels/case_i_labels.csv", index=False) # 3636 

In [None]:
# case ii) patients were transferred to low-level wards from ICU, and died later (1,974 records); 
    # there exists a transfer (not discharge) after ICU stay to a non-ICU ward, HOSPITAL_EXPIRE_FLAG=1 for admission
icu_stays['target'] = 0

for row in df[df['HOSPITAL_EXPIRE_FLAG'] == 1][['SUBJECT_ID', 'HADM_ID']].drop_duplicates().iterrows():
    row = row[1]
    sid, hid = row['SUBJECT_ID'], row['HADM_ID']
    stays = df[(df['SUBJECT_ID'] == sid) & (df['HADM_ID'] == hid)][['ICUSTAY_ID', 'OUTTIME', 'INTIME']]
    stays = stays.sort_values('OUTTIME')
    if stays.empty: 
        continue
    last_stay = stays.iloc[-1]['ICUSTAY_ID']
    last_disttime = stays.iloc[-1]['OUTTIME']
    # first_stay = stays.iloc[0]['ICUSTAY_ID']
    # first_disttime = stays.iloc[0]['OUTTIME']
    
    transfer_mask = (transfers['SUBJECT_ID'] == sid) & (transfers['HADM_ID'] == hid) & (transfers['EVENTTYPE'] != 'discharge') & (transfers['ICUSTAY_ID'].isna())
    
    ts = transfers[transfer_mask & (transfers['INTIME'] >= last_disttime)] # 1
    # ts = transfers[transfer_mask & (transfers['INTIME'] >= first_disttime)] # 2
    if ts.shape[0] > 0: 
        # print(ts)
        icu_stays.loc[icu_stays['ICUSTAY_ID'] == last_stay, 'target'] = 1  # 1
        # icu_stays.loc[icu_stays['ICUSTAY_ID'].isin(stays['ICUSTAY_ID']), 'target'] = 1  # 2


icu_stays[icu_stays['target'] == 1].shape[0] # 1656

1656

In [None]:
icu_stays[icu_stays['target'] == 1][['ICUSTAY_ID', 'target']].to_csv(f"{directory}/icu_labels/case_ii_labels.csv", index=False) # 1656

In [None]:
# case iii) patients were discharged, but returned to the ICU within the next 30 days (3,205 records); 
icu_stays['target'] = 0

for sid in df['SUBJECT_ID'].unique():
    hadms = df[df['SUBJECT_ID'] == sid][['HADM_ID', 'DISCHTIME']].drop_duplicates().sort_values('DISCHTIME')
    if hadms.shape[0] < 2: 
        continue 

    hadm_ids = hadms['HADM_ID'].unique().tolist()
    for i, hid in enumerate(hadm_ids[:-1]):
        icu_ids = df[(df['HADM_ID'] == hid) & (df['SUBJECT_ID'] == sid)]
        icu_ids = icu_ids[['DISCHTIME', 'ICUSTAY_ID', 'OUTTIME']]
        if icu_ids.empty: 
            continue

        icu_ids = icu_ids.sort_values('OUTTIME') 
        last_disc =  icu_ids.iloc[-1]['DISCHTIME'] # get discharge time from current admission
        last_icu = icu_ids.iloc[-1]['ICUSTAY_ID'] # get last ICU stay from current admission

        next_visit = df[(df['SUBJECT_ID'] == sid) & (df['HADM_ID'] == hadm_ids[i+1])].sort_values('INTIME')
        next_icu_in = next_visit.iloc[0]['INTIME'] # get ICU in time for next admission

        if last_disc + relativedelta(days=30) >= next_icu_in: 
            icu_stays.loc[icu_stays['ICUSTAY_ID'] == last_icu, 'target'] = 1  # 1
        


icu_stays[icu_stays['target'] == 1].shape[0] # 2890

In [None]:
# icu_stays[icu_stays['target'] == 1].shape[0]
icu_stays[icu_stays['target'] == 1][['ICUSTAY_ID', 'target']].to_csv(f"{directory}/icu_labels/case_iii_labels.csv", index=False) 

In [None]:
icu_stays['target'] = 0

# case iv) patients were discharged and died within the next 30 days 

for subject_id in df['SUBJECT_ID'].unique(): 
    admits = patient_admissions[(patient_admissions['SUBJECT_ID'] == subject_id) & ~(patient_admissions['DOD'].isna())]
    if admits.empty: 
        continue

    DOD = admits['DOD'].max()
    disc_admit = admits[admits['HOSPITAL_EXPIRE_FLAG'] == 0]  # Dont want a DISCHTIME that correspondes to death time
    if disc_admit.empty: 
        continue
    DISC = disc_admit['DISCHTIME'].max()
    
    patient = df[(df['SUBJECT_ID'] == subject_id)]
    hadms = patient.sort_values('OUTTIME').drop_duplicates(subset='ICUSTAY_ID', keep='last')
    last = hadms.iloc[-1]

    if DOD <= (DISC + relativedelta(days=30)):
        icu_id = last['ICUSTAY_ID']
        icu_stays.loc[icu_stays['ICUSTAY_ID'] == icu_id, 'target'] = 1


icu_stays[icu_stays['target'] == 1].shape[0] # 2139


In [None]:
# icu_stays[icu_stays['target'] == 1][['ICUSTAY_ID', 'target']].to_csv(f"{directory}/icu_labels/case_iv_labels.csv", index=False) 

In [None]:
# compile target vector

labels1 = pandas.read_csv(f"{directory}/icu_labels/case_i_labels.csv")
labels2 = pandas.read_csv(f"{directory}/icu_labels/case_ii_labels.csv")
labels3 = pandas.read_csv(f"{directory}/icu_labels/case_iii_labels.csv")
labels4 = pandas.read_csv(f"{directory}/icu_labels/case_iv_labels.csv")

labels = pandas.concat([labels1, labels2, labels3, labels4])


icu_stays['target'] = 0
icu_stays.loc[icu_stays['ICUSTAY_ID'].isin(labels['ICUSTAY_ID']), 'target'] = 1

In [None]:
icu_stays['target'].sum()

9438

In [None]:
icu_stays[['ICUSTAY_ID', 'target']].to_csv(f"{directory}/icu_labels/label_vector.csv", index=False) 