In [1]:
# Import libraries
import psycopg2
import getpass
import matplotlib.pyplot as plt
import numpy as np
import os.path
import time

In [2]:
# load the data
_data = np.load('res/admission.npy', allow_pickle=True).tolist()
adm_labels = _data['adm_labels']
all_adm = _data['all_adm']
first_adm = _data['first_adm']
adult_first_adm = _data['adult_first_adm']
print("Loaded!")

Loaded!


In [3]:
# Create a database connection
user = 'postgres'
host = 'localhost'
port = '5433'
dbname = 'mimic'
schema = 'mimiciii'

In [4]:
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=user, host=host, port=port, 
                       password=getpass.getpass(prompt='Password:'.format(user)))
cur = con.cursor()
cur.execute('SET search_path TO {}'.format(schema))
con.commit()
print("Connected!")

Password: ··········


Connected!


In [5]:
start = time.time()

# Select appropriate measurements
query = \
"""
SELECT hadm_id,
    
    /* HIV and AIDS */
    MAX(CASE
    WHEN icd9_code BETWEEN '042' AND '0449' THEN 1
    ELSE 0
        END) AS AIDS,
    
    /* Haematologic malignancy */
    MAX(CASE
    WHEN icd9_code BETWEEN '20000' AND '20238' THEN 1 -- lymphoma
    WHEN icd9_code BETWEEN '20240' AND '20248' THEN 1 -- leukemia
    WHEN icd9_code BETWEEN '20250' AND '20302' THEN 1 -- lymphoma
    WHEN icd9_code BETWEEN '20310' AND '20312' THEN 1 -- leukemia
    WHEN icd9_code BETWEEN '20302' AND '20382' THEN 1 -- lymphoma
    WHEN icd9_code BETWEEN '20400' AND '20522' THEN 1 -- chronic leukemia
    WHEN icd9_code BETWEEN '20580' AND '20702' THEN 1 -- other myeloid leukemia
    WHEN icd9_code BETWEEN '20720' AND '20892' THEN 1 -- other myeloid leukemia
    WHEN icd9_code = '2386 ' THEN 1 -- lymphoma
    WHEN icd9_code = '2733 ' THEN 1 -- lymphoma
    ELSE 0
        END) AS HAEM,
    
    /* Metastatic cancer */
    MAX(CASE
    WHEN icd9_code BETWEEN '1960 ' AND '1991 ' THEN 1
    WHEN icd9_code BETWEEN '20970' AND '20975' THEN 1
    WHEN icd9_code = '20979' THEN 1
    WHEN icd9_code = '78951' THEN 1
    ELSE 0
        END) AS METS

FROM (
    SELECT hadm_id, seq_num
    , CAST(icd9_code AS char(5)) AS icd9_code
    FROM diagnoses_icd
    ) icd

GROUP BY hadm_id
"""

# Execute the query and fetch the result
cur.execute(query)
con.commit()
data = cur.fetchall()

# Store the result of the query as a numpy array
raw_aids_haem_mets = np.array(data)

end = time.time()

# Print run time
print("This query took {} seconds".format(round((end-start),2)))

This query took 1.92 seconds


In [6]:
# Initialise a 2D numpy array
m = len(adult_first_adm) # number of patients
aids_haem_mets = np.zeros((m, 4))

In [7]:
# Select aids_haem_mets for every patient we're interested in
start = time.time()

for i in range(m):
    
    # Define hospital admission ID number
    admission_id = adult_first_adm[i,1]
    
    # Select data for hospital admission ID
    processed_aids_haem_mets = raw_aids_haem_mets[raw_aids_haem_mets[:,0]==admission_id]
    
    # Store in aids_haem_mets
    aids_haem_mets[i,0] = admission_id
    aids_haem_mets[i,1:] = processed_aids_haem_mets[:,1:]
    
    # Print progress
    if (i+1) % 1000 == 0:
        print("{}% complete ({}/{})".format(round((i+1)/m*100,2), i+1, m))

end = time.time()

# Evaluate run time
print("")
print("Finished! This took {} minutes".format(round((end-start)/60,2)))

2.59% complete (1000/38549)
5.19% complete (2000/38549)
7.78% complete (3000/38549)
10.38% complete (4000/38549)
12.97% complete (5000/38549)
15.56% complete (6000/38549)
18.16% complete (7000/38549)
20.75% complete (8000/38549)
23.35% complete (9000/38549)
25.94% complete (10000/38549)
28.54% complete (11000/38549)
31.13% complete (12000/38549)
33.72% complete (13000/38549)
36.32% complete (14000/38549)
38.91% complete (15000/38549)
41.51% complete (16000/38549)
44.1% complete (17000/38549)
46.69% complete (18000/38549)
49.29% complete (19000/38549)
51.88% complete (20000/38549)
54.48% complete (21000/38549)
57.07% complete (22000/38549)
59.66% complete (23000/38549)
62.26% complete (24000/38549)
64.85% complete (25000/38549)
67.45% complete (26000/38549)
70.04% complete (27000/38549)
72.63% complete (28000/38549)
75.23% complete (29000/38549)
77.82% complete (30000/38549)
80.42% complete (31000/38549)
83.01% complete (32000/38549)
85.61% complete (33000/38549)
88.2% complete (34000/3

In [8]:
aids_haem_mets_labels = ["hadm_id", "AIDS", "Haematologic malignancy", "Metastatic cancer"]

# Save to aids_haem_mets.npy
if not os.path.exists('./res'):
    os.makedirs('./res')

tosave = {'aids_haem_mets_labels': aids_haem_mets_labels, 'aids_haem_mets': aids_haem_mets}
np.save('res/aids_haem_mets.npy',tosave)
print("Saved!")

Saved!
