# Collect mortality labels

We use this script to calculate mortality labels and store them in folder './admdata_times'. Labels generated here will be used in later steps of pre-processing.

In [1]:
from __future__ import print_function

import psycopg2
import datetime
import sys
from operator import itemgetter, attrgetter, methodcaller
import numpy as np
import itertools
import os.path
import matplotlib.pyplot as plt
import math
from multiprocessing import Pool, cpu_count
import re
import traceback
import random
import shutil

from utils import getConnection
from utils import parseUnitsMap
from utils import parseNum
from utils import sparsify

random.seed(0)

%matplotlib inline

In [2]:
admdir = 'admdata/'
admaids = [re.match(r'adm\-(\d+)\.npy', x) for x in os.listdir(admdir)]
admaids = sorted([int(x.group(1)) for x in admaids if x is not None])

In [3]:
print(179979 in admaids)

True


## Generate mortality labels

Here we collect all timestamps related to mortality labels.
Situations when the labels should be 1:
- in-hospital mortality: deathtime is not null
- 48/72 mortality: deathtime - icuintime <= 48/72hrs
- 30d/1yr mortality: dod - dischtime <= 30d/1yr

In [4]:
# Here we collect all timestamps related to our labels
# we need: dob, dod, admittime, first_icuintime
# admissions: admittime, dischtime, deathtime
# patients: dob, dod
# icustays: intime, outtime
timedir = 'admdata_times/'
if not os.path.exists(timedir):
    os.makedirs(timedir)

def collect_timestamps(aid):
    conn = getConnection()
    cur = conn.cursor()
    sql = 'select subject_id, admittime, dischtime, deathtime from mimiciii.admissions where hadm_id={0}'.format(aid)
    cur.execute(sql)
    res = cur.fetchone()
    subject_id = res[0]
    admittime, dischtime, deathtime = res[1], res[2], res[3]
    sql = 'select dob, dod from mimiciii.patients where subject_id={0}'.format(subject_id)
    cur.execute(sql)
    res = cur.fetchone()
    dob, dod = res[0], res[1]
    sql = 'select intime, outtime from mimiciii.icustays where hadm_id={0} order by intime'.format(aid)
    cur.execute(sql)
    icutimepairs = cur.fetchall()
    data = {
        'dob':dob,
        'dod':dod,
        'admittime':admittime,
        'dischtime':dischtime,
        'deathtime':deathtime,
        'icustays':icutimepairs
    }
    np.save(os.path.join(timedir, 'adm-%.6d.npy' % aid), data)

timelabeldir = 'admdata_timelabels/'
if not os.path.exists(timelabeldir):
    os.makedirs(timelabeldir)
    
np_load_old = np.load

# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True)

def parse_labels(aid):
    times = np.load(os.path.join(timedir, 'adm-%.6d.npy' % aid)).tolist()
    dob = times['dob']
    dod = times['dod']
    admittime = times['admittime']
    dischtime = times['dischtime']
    deathtime = times['deathtime']
    icustays = times['icustays']
    mor, mor24, mor48, mor72, mor30d, mor1y = 0, 0, 0, 0, 0, 0
    # choose starttime, here choose first icustay time in priority
    try:
        starttime = icustays[0][0]
    except:
        starttime = admittime
    if starttime is None:
        data = {
            'mor':None,
            'mor24':None,
            'mor48':None,
            'mor72':None,
            'mor30d':None,
            'mor1y':None
        }
        np.save(os.path.join(timelabeldir, 'adm-%.6d.npy' % aid), None)
        return
    # generate labels
    try:
        mor = int(deathtime is not None)
        assert mor == 1
        tlen = (deathtime - starttime).total_seconds()
        mor24 = int(tlen <= 24 * 60 * 60)
        mor48 = int(tlen <= 48 * 60 * 60)
        mor72 = int(tlen <= 72 * 60 * 60)
    except:
        pass
    try:
        livelen = (dod - dischtime).total_seconds()
        mor30d = int(livelen <= 30 * 24 * 60 * 60)
        mor1y = int(livelen <= 365.245 * 24 * 60 * 60)
    except:
        pass
    data = {
        'mor':mor,
        'mor24':mor24,
        'mor48':mor48,
        'mor72':mor72,
        'mor30d':mor30d,
        'mor1y':mor1y
    }
#     print(data)
    np.save(os.path.join(timelabeldir, 'adm-%.6d.npy' % aid), data)

collect_timestamps(179979)
parse_labels(179979)

In [5]:
#p = Pool()
for aid in admaids:
    #p.apply_async(collect_timestamps, args=(aid,))
    collect_timestamps(aid)
#p.close()
#p.join()

#p = Pool()
for aid in admaids:
    #p.apply_async(parse_labels, args=(aid,))
    parse_labels(aid)
#p.close()
#p.join()