In [1]:
import pandas as pd
import numpy as np
import sys, os
from tqdm.notebook import tqdm
from datetime import timedelta
import matplotlib.pyplot as plt
import pickle

if os.getcwd()[-4:] == "code":
    os.chdir('../')

icu = './data/mimic-iv-2.2-parquet/icu/'
hosp = './data/mimic-iv-2.2-parquet/hosp/'

In [2]:
labevents     = pd.read_parquet(hosp+'labevents.parquet')
d_labitems    = pd.read_parquet(hosp+'d_labitems.parquet')

patients      = pd.read_parquet(hosp+'patients.parquet')
admissions    = pd.read_parquet(hosp+'admissions.parquet')

diagnoses_icd = pd.read_parquet(hosp+'diagnoses_icd.parquet')
microbiology  = pd.read_parquet(hosp+'microbiologyevents.parquet')
prescriptions = pd.read_parquet(hosp+'prescriptions.parquet')

In [3]:
chartevents     = pd.read_parquet(icu+'chartevents.parquet')
d_items         = pd.read_parquet(icu+'d_items.parquet')
inputevents     = pd.read_parquet(icu+'inputevents.parquet')
procedureevents = pd.read_parquet(icu+'procedureevents.parquet')
icustays        = pd.read_parquet(icu+'icustays.parquet')

In [4]:
icd9_suspected_infection = ['001', '002', '003', '004', '005', '006', '007', '008', '009', '010', 
                           '011', '012', '013', '014', '015', '016', '017', '018', '019', '020', 
                           '021', '022', '023', '024', '025', '026', '027', '028', '029', '030', 
                           '031', '032', '033', '034', '035', '036', '037', '038', '039', '040', 
                           '041', '042', '043', '044', '045', '046', '047', '048', '049', '050', 
                           '051', '052', '053', '054', '055', '056', '057', '058', '059', '060', 
                           '061', '062', '063', '064', '065', '066', '067', '068', '069', '070', 
                           '071', '072', '073', '074', '075', '076', '077', '078', '079', '080', 
                           '081', '082', '083', '084', '085', '086', '087', '088', '089', '090', 
                           '091', '092', '093', '094', '095', '096', '097', '098', '099', '100', 
                           '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', 
                           '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', 
                           '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', 
                           '131', '132', '133', '134', '135', '136', '137', '138', '139', '320', 
                           '321', '322', '323', '324', '325', '326', '42090', '420', '421', '422', 
                           '4476', '460', '461', '462', '463', '464', '465', '482', '483', '484', 
                           '485', '486', '487', '488', '466', '523', '52800', '5283', '6820', '53110', 
                           '53120', '53160', '53210', '53220', '53250', '53410', '53420', '53450', 
                           '542', '541', '562', '566', '5695', '56983', '567', '99859', '572', 
                           '5740', '5741', '5742', '5743', '5744', '5745', '5746', '5747', '5748', 
                           '5750', '5751', '5754', '5759', '5761', '5763', '681', '682', '683', 
                           '684', '685', '686', '711', '590', '595', '597', '5990', '601', '604', 
                           '608', '611', '614', '615', '616', '646', '670', '647', '675', '672', 
                           '7806', '7806', '78559', '7855', '99592', '78552', '7919', '7920', '7929', 
                           '7924', '7922', '7929', '7907']

In [5]:
icd10_suspected_infection = ['A', 'B', 'G00', 'G01', 'G02', 'G03', 'G04', 'G05', 'G06', 'G07', 'G08',
                              'G09', 'I30', 'I32', 'I33', 'I40', 'I776', 'J00', 'J01', 'J02', 'J03', 
                              'J04', 'J05', 'J06', 'J09', 'J10', 'J11', 'J12', 'J13', 'J14', 'J15', 
                              'J16', 'J17', 'J18', 'J20', 'J21', 'J22', 'K05', 'K122', 'K251', 'K252', 
                              'K256', 'K261', 'K262', 'K265', 'K281', 'K282', 'K285', 'K35', 'K36', 'K37', 
                              'K57', 'K61', 'K630', 'K631', 'K65', 'K681', 'K750', 'K800', 'K801', 'K803', 
                              'K804', 'K806', 'K81', 'K822', 'K82A', 'K830', 'K832', 'L00', 'L01', 'L02', 
                              'L03', 'L04', 'L05', 'L06', 'L07', 'L08', 'M00', 'M01', 'M02', 'N10', 'N30', 
                              'N34', 'N390', 'N41', 'N45', 'N49', 'N61', 'N70', 'N71', 'N72', 'N73', 'N74', 
                              'N75', 'N76', 'N77', 'O23', 'O85', 'O86', 'O91', 'O98', 'R508', 'R509', 'R578', 
                              'R579', 'R652', 'R827', 'R835', 'R845', 'R855', 'R865', 'R875', 'R7881']

In [6]:
prescription_antibiotics = ['amikacin', 'gentamicin', 'kanamycin', 'netilmicin', 'tobramycin', 'paromomycin', 'spectinomycin', 'geldanamycin', 
                            'ertapenem', 'doripenem', 'imipenem', 'meropenem', 'cefadroxil', 'cefalexin', 'cefaclor', 'cefoxitin', 'cefprozil', 
                            'cefamandole', 'cefuroxime', 'cefixime', 'cefotaxime', 'cefpodoxime', 'ceftazidime', 'ceftriaxone', 'cefepime', 'vancomycin', 
                            'vanc', 'clindamycin', 'daptomycin', 'azithromycin', 'clarithromycin', 'erythromycin', 'telithromycin', 'aztreonam', 
                            'nitrofurantoin', 'linezolid', 'amoxicillin', 'ampicillin', 'dicloxacillin', 'flucloxacillin', 'methicillin', 'nafcillin', 
                            'oxacillin', 'penicillin', 'piperacillin', 'cefotetan', 'ticarcillin', 'timentin', 'colistin', 'bactrim', 'polymyxin', 
                            'ciprofloxacin', 'gatifloxacin', 'levofloxacin', 'moxifloxacin', 'nalidixic acid', 'norfloxacin', 'ofloxacin', 'trovafloxacin', 
                            'sulfadiazine', 'sulfamethoxazole', 'trimethoprim', 'TMP', 'doxycycline', 'minocycline', 'tetracycline', 'dapsone', 'ethambutol', 
                            'isoniazid', 'pyrazinamide', 'rifampicin', 'rifampin', 'rifabutin', 'streptomycin', 'chloramphenicol', 'synercid', 'fosfomycin', 
                            'metronidazole', 'mupirocin', 'quinupristin', 'tigecycline', 'unasyn']

In [7]:
d_antibiotics = d_items[(d_items['linksto']=='inputevents')&(d_items['category']=='Antibiotics')].itemid.unique()

In [8]:
diagnoses_icd9  = diagnoses_icd[diagnoses_icd['icd_version']==9]
diagnoses_icd10 = diagnoses_icd[diagnoses_icd['icd_version']==10]

In [9]:
# Suspected infection with ICD9 code
SI_icd9 = diagnoses_icd9[diagnoses_icd9['icd_code'].str.startswith(tuple(icd9_suspected_infection))].hadm_id.unique()

# Suspected infection with ICD10 code
SI_icd10 = diagnoses_icd10[diagnoses_icd10['icd_code'].str.startswith(tuple(icd10_suspected_infection))].hadm_id.unique()

#Blood culture - icu
SI_BC_icu = procedureevents[procedureevents['itemid'].isin([225401,225437])].hadm_id.unique()

#Blood culture - hosp
SI_BC_hosp = microbiology[microbiology['spec_itemid'].isin([70011,70012])].hadm_id.unique()

#Prescription - icu
prescriptions['drug'] = prescriptions['drug'].str.lower()
SI_AB_hosp = prescriptions[prescriptions['drug'].isin(prescription_antibiotics)].hadm_id.unique()

SI_AB_icu = inputevents[inputevents['itemid'].isin(d_antibiotics)].hadm_id.unique()

In [10]:
hadm_id = np.concatenate([SI_icd9,SI_icd10,SI_AB_hosp,SI_AB_icu,SI_BC_hosp,SI_BC_hosp])

In [11]:
len(hadm_id)

459530

In [12]:
hadm_id = np.unique(hadm_id)
len(hadm_id)

201518

In [13]:
icustays_SI = icustays[icustays['hadm_id'].isin(hadm_id)]

print("hadm_id : ",len(icustays_SI.hadm_id.unique()))
print("\nstay_id : ",len(icustays_SI.stay_id.unique()))

hadm_id :  52240

stay_id :  58820


In [16]:
with open('./data/data_pickle/SI_hadm_id.pickle', 'wb') as f:
    pickle.dump(icustays_SI.hadm_id.unique(), f)

with open('./data/data_pickle/SI_stay_id.pickle', 'wb') as f:
    pickle.dump(icustays_SI.stay_id.unique(), f)

In [17]:
len(icustays.hadm_id.unique())

66239

In [18]:
len(icustays.stay_id.unique())

73181