In [1]:
import os, sys
import pandas as pd
import numpy as np

# Select heart disease + melanoma cohorts

In [27]:
def gather_cohort(adults=True, patient_weight=True, icd_diagnoses=[], min_los=1, max_los=8, verbose=False):
    cohort = pd.read_csv("data/mimic-iv-1.0/icu/icustays.csv")
    # Filter for adults with icustay length between 1 and 8 days
    cohort = cohort[cohort['los'] >= min_los]
    cohort = cohort[cohort['los'] <= max_los]
    cohort = cohort[['subject_id', 'stay_id', 'hadm_id', 'intime', 'outtime', 'los']]

    ages = pd.read_csv("data/mimic-iv-1.0/core/patients.csv")
    ages = ages[ages['anchor_age'] >= 18]
    ages = ages[['subject_id', 'gender', 'anchor_age']]

    admissions = pd.merge(cohort, ages, on=['subject_id'])

    # Admissions + Age information
    admissions = pd.merge(admissions, ages, on=['subject_id', 'gender', 'anchor_age'], how='inner')
    admissions
    if patient_weight:
        if verbose: print("Adding information about patientweight")
        # Add patient weight information
        weights = pd.read_csv("data/mimic-iv-1.0/icu/procedureevents.csv")
        weights = weights[['stay_id', 'hadm_id', 'patientweight']]
        admissions = pd.merge(admissions, weights, on=['hadm_id', 'stay_id']).drop_duplicates()
    admissions
    
    if len(icd_diagnoses) > 0:
        if verbose: print("Filtering for ICD diagnoses")
        diagnoses_icd = pd.read_csv("data/mimic-iv-1.0/hosp/diagnoses_icd.csv")
        diagnoses_icd = diagnoses_icd[diagnoses_icd[['icd_code', 'icd_version']].apply(tuple, axis=1).isin(icd_diagnoses)]
        diagnoses = diagnoses_icd[['hadm_id', 'icd_code', 'icd_version']]
        admissions = pd.merge(admissions, diagnoses, on=['hadm_id']).drop_duplicates()

    hadm_ids = admissions.hadm_id.unique()

    return hadm_ids, admissions

## Melanoma

In [28]:
diagnoses_icd = pd.read_csv("data/mimic-iv-1.0/hosp/d_icd_diagnoses.csv")
melanoma_codes = diagnoses_icd[diagnoses_icd['long_title'].str.contains('melanoma')]
melanoma = []
for c, v in zip(melanoma_codes['icd_code'], melanoma_codes['icd_version']):
    melanoma.append((c, v))
hadm_ids, admissions = gather_cohort(icd_diagnoses=melanoma, verbose=True)
admissions

Adding information about patientweight
Filtering for ICD diagnoses


Unnamed: 0,subject_id,stay_id,hadm_id,intime,outtime,los,gender,anchor_age,patientweight,icd_code,icd_version
0,12506390,30062923,21301912,2160-10-17 13:39:06,2160-10-18 14:19:00,1.027708,M,79,147.0,Z85820,10
1,10156486,30065290,25194307,2124-04-06 21:29:32,2124-04-09 17:40:47,2.841146,F,75,69.6,Z85820,10
2,14387076,30066446,28791915,2122-04-21 10:39:22,2122-04-27 19:10:46,6.355139,F,83,82.2,V1082,9
3,14387076,37658360,28791915,2122-04-16 13:41:58,2122-04-20 17:13:21,4.146794,F,83,82.2,V1082,9
4,14387076,39592074,28791915,2122-04-27 19:26:03,2122-04-29 18:46:52,1.972789,F,83,82.2,V1082,9
...,...,...,...,...,...,...,...,...,...,...,...
626,16658982,39111464,21313868,2126-04-26 08:51:17,2126-04-27 20:54:25,1.502176,F,80,73.3,Z85820,10
627,15882166,39370261,22894977,2140-02-10 11:00:15,2140-02-11 22:27:26,1.477211,F,79,56.0,V1082,9
628,18434869,39487652,25908639,2185-08-27 02:11:09,2185-08-31 17:23:17,4.633426,M,70,59.7,V1082,9
629,12068318,39499950,21876859,2173-08-14 09:07:27,2173-08-15 18:53:10,1.406748,M,70,78.0,V1082,9


## Heart disease

In [30]:
cad_names = ['Atherosclerotic heart disease of native coronary artery without angina pectoris',
'Atherosclerotic heart disease of native coronary artery with unstable angina pectoris', 
'Atherosclerotic heart disease of native coronary artery with angina pectoris with documented spasm', 
'Atherosclerotic heart disease of native coronary artery with other forms of angina pectoris', 
'Atherosclerotic heart disease of native coronary artery with unspecified angina pectoris']
diagnoses_icd = pd.read_csv("data/mimic-iv-1.0/hosp/d_icd_diagnoses.csv")
hd_codes = diagnoses_icd[diagnoses_icd['long_title'].isin(cad_names)]
hd = []
for c, v in zip(hd_codes['icd_code'], hd_codes['icd_version']):
    hd.append((c, v))
hadm_ids, admissions = gather_cohort(icd_diagnoses=hd, verbose=True)
admissions

Adding information about patientweight
Filtering for ICD diagnoses


Unnamed: 0,subject_id,stay_id,hadm_id,intime,outtime,los,gender,anchor_age,patientweight,icd_code,icd_version
0,10215159,34755606,24039782,2127-12-14 20:04:00,2127-12-20 21:47:17,6.071725,F,67,73.0,I2510,10
1,10215159,38137388,24039782,2127-12-22 06:57:18,2127-12-25 22:27:46,3.646157,F,67,73.0,I2510,10
2,12974563,32563675,29618057,2138-11-13 23:30:01,2138-11-15 16:25:19,1.705069,F,72,90.7,I2510,10
3,12974563,36274915,24320856,2139-04-15 16:32:41,2139-04-17 00:23:17,1.326806,F,72,850.0,I2510,10
4,12687112,37445058,26132667,2162-05-31 18:08:45,2162-06-04 10:16:13,3.671852,M,57,119.1,I2510,10
...,...,...,...,...,...,...,...,...,...,...,...
7461,14263294,39928603,27354668,2126-03-04 09:57:14,2126-03-05 11:50:07,1.078391,M,61,76.0,I2510,10
7462,10836444,39934059,25551438,2170-12-09 13:50:58,2170-12-10 17:23:26,1.147546,M,47,95.0,I2510,10
7463,16979986,39944977,29542651,2159-10-06 07:34:56,2159-10-07 22:34:31,1.624711,M,79,73.9,I2510,10
7464,19113885,39978206,25409746,2122-06-25 02:13:00,2122-06-27 16:35:53,2.599225,M,84,67.2,I2510,10
