# Metrics

This script is used for calculating metrics from saved results of SuperLearner(Python version), FFN and FFN+LSTM.

In [1]:
from sklearn.metrics import roc_auc_score, average_precision_score
import numpy as np
import matplotlib.pyplot as plt
import re
import os

%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf')

## Translate the name of a dataset to its path

Given the name of a dataset, output the paths to the folder having the results of FFN/FFN+LSTM model, to the folder having temporal data, to the folder having non-temporal data, to the folder having indices of each fold, and to the imputed datafile.

In [2]:
OUTPUTDIR = '../InterpretableMimicLearning/output/'
DATADIR = '../../Data/'

def gen_paths_from_datasetname(datasetname):
    dataname, featurename, hrs = re.match(r'(.+?)\_(.+)\_(\d+)h', datasetname).groups()
    assert dataname in ['mimic3', 'mimic2']
    assert featurename in ['17f', '17f_raw', '99p_raw']
    assert hrs in ['24', '48']
    resultfold = os.path.join(OUTPUTDIR, datasetname, 'imputed-normed-ep_1_%s' % hrs)
    if featurename.endswith('_raw'):
        rawflag = '_raw'
    else:
        rawflag = ''
    if dataname == 'mimic2':
        sub = 'cv'
    else:
        sub = ''
    datafold = os.path.join(DATADIR, 'admdata_%s' % featurename.split('_')[0], '%shrs' % hrs + rawflag, 'series', sub)
    non_serial_datafold = os.path.join(DATADIR, 'admdata_%s' % featurename.split('_')[0], '%shrs' % hrs + rawflag, 'non_series')
    foldfilepath = os.path.join(datafold, '5-folds.npz')
    impfilepath = os.path.join(datafold, 'imputed-normed-ep_1_%s.npz' % hrs)
    return resultfold, datafold, non_serial_datafold, foldfilepath, impfilepath

gen_paths_from_datasetname('mimic2_99p_raw_24h')

('../InterpretableMimicLearning/output/mimic2_99p_raw_24h/imputed-normed-ep_1_24',
 '../../Data/admdata_99p/24hrs_raw/series/cv',
 '../../Data/admdata_99p/24hrs_raw/non_series',
 '../../Data/admdata_99p/24hrs_raw/series/cv/5-folds.npz',
 '../../Data/admdata_99p/24hrs_raw/series/cv/imputed-normed-ep_1_24.npz')

## Define parsers

Here we define result parsers for different models.

In [10]:
DL1NAME = '_MMDL1_output_dim=2_ffn_depth=1_merge_depth=0_batch_size=100_nb_epoch=250_EarlyStopping=True_BestWeight_EarlyStopping_patience=20_batch_normalization=True_learning_rate=0.001_dropout=0.1.npz'
FFNNAME_17f = '_MMDL2_output_dim=2_ffn_depth=1_merge_depth=0_batch_size=100_nb_epoch=250_EarlyStopping=True_BestWeight_EarlyStopping_patience=20_batch_normalization=True_learning_rate=0.001_dropout=0.1.npz'
FFNNAME_99p = '_MMDL2_output_dim=4_ffn_depth=4_merge_depth=0_batch_size=100_nb_epoch=250_EarlyStopping=True_BestWeight_EarlyStopping_patience=20_batch_normalization=True_learning_rate=0.001_dropout=0.1.npz'

def calc_metrics_dl(datasetname, taskname, labelnum, metricslist, modelname, foldn=5):
    resultfold, _, _, foldfilepath, impfilepath = gen_paths_from_datasetname(datasetname)
    if taskname == 'mor':
        # DL-1
        resultfilepath = os.path.join(resultfold, str(labelnum), modelname)
        resultfile = np.load(resultfilepath, encoding='bytes')
        impfile = np.load(impfilepath)
        foldfile = np.load(foldfilepath)
        y_soft_arr, y_pred_arr = resultfile['y_soft_arr'][0], resultfile['y_pred_arr'][0]
        y_true = impfile['adm_labels_all'][:, labelnum]
        testset = foldfile['folds_ep_mor'][labelnum][0][:, -1]
        resarr = [[] for met in metricslist]
        for ti, test in enumerate(testset):
            ysatest, ypatest, yttest = y_soft_arr[ti][test], y_pred_arr[ti][test], y_true[test]
            for mi, metri in enumerate(metricslist):
                resarr[mi].append(metri(yttest, ysatest))
        return [[np.nanmean(x), np.nanstd(x)] for x in resarr]
    elif taskname == 'los':
        resultfilepath = os.path.join(resultfold, str(labelnum), modelname)
        resultfile = np.load(resultfilepath, encoding='bytes')
        impfile = np.load(impfilepath)
        foldfile = np.load(foldfilepath)
        y_soft_arr, y_pred_arr = resultfile['y_soft_arr'][0], resultfile['y_pred_arr'][0]
        y_true = impfile['y_los'][:, labelnum]
        testset = foldfile['folds_ep_mor'][labelnum][0][:, -1]
        resarr = [[] for met in metricslist]
        for ti, test in enumerate(testset):
            ysatest, ypatest, yttest = y_soft_arr[ti][test], y_pred_arr[ti][test], y_true[test]
            for mi, metri in enumerate(metricslist):
                resarr[mi].append(metri(yttest, ysatest))
        return [[np.nanmean(x), np.nanstd(x)] for x in resarr]
    elif taskname == 'icd9':
        resultfilepath = os.path.join(resultfold, str(labelnum), modelname)
        resultfile = np.load(resultfilepath, encoding='bytes')
        impfile = np.load(impfilepath)
        foldfile = np.load(foldfilepath)
        y_soft_arr, y_pred_arr = resultfile['y_soft_arr'][0], resultfile['y_pred_arr'][0]
        y_true = impfile['y_icd9'][:, labelnum]
        testset = foldfile['folds_ep_icd9_multi'][0][0][:, -1]
        resarr = [[] for met in metricslist]
        for ti, test in enumerate(testset):
            ysatest, ypatest, yttest = y_soft_arr[ti][test], y_pred_arr[ti][test], y_true[test]
            for mi, metri in enumerate(metricslist):
                resarr[mi].append(metri(yttest, ysatest))
        return [[np.nanmean(x), np.nanstd(x)] for x in resarr]
    
def calc_metrics_dl1(datasetname, taskname, labelnum, metricslist, foldn=5):
    if taskname == 'mor':
        modelname = 'Mortality' + DL1NAME
    elif taskname == 'icd9':
        modelname = 'ICD9_%d' % labelnum + DL1NAME
    return calc_metrics_dl(datasetname, taskname, labelnum, metricslist, modelname, foldn=5)

def calc_metrics_ffn(datasetname, taskname, labelnum, metricslist, foldn=5):
    if '17f' in datasetname:
        FFNNAME = FFNNAME_17f
    elif '99p' in datasetname:
        FFNNAME = FFNNAME_99p
    if taskname == 'mor':
        modelname = 'Mortality' + FFNNAME
    elif taskname == 'icd9':
        modelname = 'ICD9_%d' % labelnum + FFNNAME
    return calc_metrics_dl(datasetname, taskname, labelnum, metricslist, modelname, foldn=5)

def calc_metrics_slpy(datasetname, taskname, labelnum, metricslist, sltype='sl2', foldn=5):
    _, datafold, non_serial_datafold, foldfilepath, impfilepath = gen_paths_from_datasetname(datasetname)
    if taskname == 'mor' or taskname == 'icd9':
        if datafold.endswith('cv'):
            subname = 'cv'
        else:
            subname = 'all'
        resultfilepath = os.path.join(non_serial_datafold, 'pyslresults-%s-%s-%s-%s.npz' % (taskname, str(labelnum), subname, sltype))
        result = np.load(resultfilepath)
        y_soft_arr, y_true_arr = result['y_pred_cv'], result['y_true_cv']
        resarr = [[] for met in metricslist]
        for y_soft, y_true in zip(y_soft_arr, y_true_arr):
            for mi, metri in enumerate(metricslist):
                resarr[mi].append(metri(y_true, y_soft[:, -1]))
        return [[np.nanmean(x), np.nanstd(x)] for x in resarr]
    
metricslist = [roc_auc_score, average_precision_score]
calc_metrics_ffn('mimic3_99p_raw_24h', 'icd9', 0, metricslist)

[[0.76425748141691374, 0.0047493374723502398],
 [0.58070660179871947, 0.0072299268906704144]]

## Mortality tasks

Print metrics(AUPRC and AUROC) of SuperLearner(Python Version), FFN, FFN+LSTM for all datasets.

In [11]:
# AUPRC
for dn1 in ['mimic3', 'mimic2']:
    for dn2 in ['17f', '17f_raw', '99p_raw']:
        for dn3 in ['24h', '48h']:
            datasetname = '_'.join([dn1, dn2, dn3])
            taskname = 'mor'
            if dn3 == '24h':
                labelnums = [0,2,3,4,5]
            elif dn3 == '48h':
                labelnums = [0,3,4,5]
            metricslist = [roc_auc_score, average_precision_score]
            sl1s = []
            sl2s = []
            dl1s = []
            ffns = []
            for labelnum in labelnums:
                try:
#                     print('%s-%s-%s-SL1' % (datasetname, taskname, labelnum))
                    sl1s.append(calc_metrics_slpy(datasetname, taskname, labelnum, metricslist, sltype='sl1'))
                except:
                    sl1s.append(None)
                try:
#                     print('%s-%s-%s-SL2' % (datasetname, taskname, labelnum))
                    sl2s.append(calc_metrics_slpy(datasetname, taskname, labelnum, metricslist, sltype='sl2'))
                except:
                    sl2s.append(None)
                try:
#                     print('%s-%s-%s-DL1' % (datasetname, taskname, labelnum))
                    dl1s.append(calc_metrics_dl1(datasetname, taskname, labelnum, metricslist))
                except:
                    dl1s.append(None)
                try:
                    ffns.append(calc_metrics_ffn(datasetname, taskname, labelnum, metricslist))
                except:
                    ffns.append(None)
            print(datasetname)
            print('SL1')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[1]))) if x is not None else 'None' for x in sl1s]))
            print('SL2')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[1]))) if x is not None else 'None' for x in sl2s]))
            print('DL1')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[1]))) if x is not None else 'None' for x in dl1s]))
            print('FFN')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[1]))) if x is not None else 'None' for x in ffns]))
            print()

mimic3_17f_24h
SL1
0.4358, 0.0138&0.2004, 0.0195&0.2246, 0.0257&0.5057, 0.0171&0.6260, 0.0072
SL2
0.4996, 0.0107&0.2502, 0.0120&0.2799, 0.0380&0.5654, 0.0186&0.6610, 0.0090
DL1
0.4868, 0.0166&0.2569, 0.0331&0.2865, 0.0201&0.5488, 0.0187&0.6487, 0.0098
FFN
0.4637, 0.0074&0.2462, 0.0326&0.2468, 0.0315&0.5371, 0.0199&0.6456, 0.0080

mimic3_17f_48h
SL1
0.4199, 0.0124&0.1398, 0.0255&0.4903, 0.0139&0.6172, 0.0088
SL2
0.4431, 0.0179&0.2003, 0.0279&0.5244, 0.0172&0.6460, 0.0087
DL1
0.4722, 0.0173&0.2171, 0.0334&0.5428, 0.0164&0.6423, 0.0115
FFN
0.4098, 0.0169&0.1720, 0.0321&0.4967, 0.0153&0.6274, 0.0116

mimic3_17f_raw_24h
SL1
None&None&None&None&None
SL2
0.4117, 0.0119&0.1812, 0.0314&0.2195, 0.0263&0.4931, 0.0089&0.6330, 0.0100
DL1
0.4771, 0.0108&0.2509, 0.0358&0.1895, 0.0268&0.5463, 0.0229&0.6459, 0.0082
FFN
0.3819, 0.0135&0.1528, 0.0345&0.1882, 0.0149&0.4710, 0.0182&0.6150, 0.0071

mimic3_17f_raw_48h
SL1
None&None&None&None
SL2
0.4063, 0.0154&0.1262, 0.0279&0.4897, 0.0197&0.6300, 0.0067
DL1

In [12]:
# AUROC
for dn1 in ['mimic3', 'mimic2']:
    for dn2 in ['17f', '17f_raw', '99p_raw']:
        for dn3 in ['24h', '48h']:
            datasetname = '_'.join([dn1, dn2, dn3])
            taskname = 'mor'
            if dn3 == '24h':
                labelnums = [0,2,3,4,5]
            elif dn3 == '48h':
                labelnums = [0,3,4,5]
            metricslist = [roc_auc_score, average_precision_score]
            sl1s = []
            sl2s = []
            dl1s = []
            ffns = []
            for labelnum in labelnums:
                try:
#                     print('%s-%s-%s-SL1' % (datasetname, taskname, labelnum))
                    sl1s.append(calc_metrics_slpy(datasetname, taskname, labelnum, metricslist, sltype='sl1'))
                except:
                    sl1s.append(None)
                try:
#                     print('%s-%s-%s-SL2' % (datasetname, taskname, labelnum))
                    sl2s.append(calc_metrics_slpy(datasetname, taskname, labelnum, metricslist, sltype='sl2'))
                except:
                    sl2s.append(None)
                try:
#                     print('%s-%s-%s-DL1' % (datasetname, taskname, labelnum))
                    dl1s.append(calc_metrics_dl1(datasetname, taskname, labelnum, metricslist))
                except:
                    dl1s.append(None)
                try:
                    ffns.append(calc_metrics_ffn(datasetname, taskname, labelnum, metricslist))
                except:
                    dl2s.append(None)
            print(datasetname)
            print('SL1')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[0]))) if x is not None else 'None' for x in sl1s]))
            print('SL2')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[0]))) if x is not None else 'None' for x in sl2s]))
            print('DL1')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[0]))) if x is not None else 'None' for x in dl1s]))
            print('FFN')
            print('&'.join([', '.join(list(map(lambda t: '%.4f'%t, x[0]))) if x is not None else 'None' for x in ffns]))
            print()

mimic3_17f_24h
SL1
0.8448, 0.0038&0.8808, 0.0063&0.8627, 0.0079&0.8384, 0.0031&0.8260, 0.0019
SL2
0.8701, 0.0053&0.8851, 0.0105&0.8770, 0.0094&0.8620, 0.0063&0.8467, 0.0022
DL1
0.8665, 0.0063&0.8815, 0.0102&0.8725, 0.0063&0.8585, 0.0059&0.8450, 0.0019
FFN
0.8496, 0.0047&0.8673, 0.0069&0.8493, 0.0128&0.8475, 0.0050&0.8390, 0.0019

mimic3_17f_48h
SL1
0.8465, 0.0057&0.8675, 0.0046&0.8364, 0.0033&0.8222, 0.0047
SL2
0.8588, 0.0039&0.8710, 0.0063&0.8532, 0.0054&0.8414, 0.0028
DL1
0.8737, 0.0045&0.8596, 0.0124&0.8612, 0.0059&0.8418, 0.0049
FFN
0.8375, 0.0041&0.8466, 0.0186&0.8385, 0.0061&0.8309, 0.0048

mimic3_17f_raw_24h
SL1
None&None&None&None&None
SL2
0.8411, 0.0061&0.8667, 0.0097&0.8535, 0.0128&0.8395, 0.0031&0.8347, 0.0046
DL1
0.8730, 0.0065&0.8716, 0.0083&0.8294, 0.0188&0.8613, 0.0086&0.8441, 0.0025
FFN
0.8225, 0.0064&0.8345, 0.0083&0.8244, 0.0151&0.8256, 0.0063&0.8259, 0.0047

mimic3_17f_raw_48h
SL1
None&None&None&None
SL2
0.8471, 0.0036&0.8448, 0.0162&0.8427, 0.0071&0.8360, 0.0057
DL1

In [96]:
x = np.load('../../Data/admdata_99p/48hrs_raw/non_series/pyslresults-mor-0-cv-sl2.npz')
y_preds = x['y_pred_cv']
y_true = x['y_true_cv']
def roc_prc_parser(y_preds, y_true):
    algon = y_preds[0].shape[1]
    for an in range(algon):
         aurocs = []
         auprcs = []
         foldn = y_preds.shape[0]
         for fn in range(foldn):
             roc = roc_auc_score(y_true[fn], y_preds[fn][:, an])
             try:
                 prc = average_precision_score(y_true[fn], y_preds[fn][:, an])
             except:
                 prc = None
             aurocs.append(roc)
             auprcs.append(prc)
         aurocs = np.array(aurocs)
         auprcs = np.array(auprcs)
         print(aurocs, auprcs)
         print(an)
         print(np.nanmean(aurocs), np.nanstd(aurocs))
         print(np.nanmean(auprcs), np.nanstd(auprcs))
         print()
roc_prc_parser(y_preds, y_true)

[ 0.86028553  0.83713665  0.85191225  0.84212915  0.85709113] [ 0.4205454   0.4004649   0.42451007  0.40883301  0.40970173]
0
0.849710941969 0.00879467251219
0.412811021761 0.00865572034833

[ 0.5  0.5  0.5  0.5  0.5] [ 0.55253716  0.55242246  0.5524359   0.5524359   0.5524359 ]
1
0.5 0.0
0.552453461704 4.2171921122e-05

[ 0.88015604  0.87490093  0.87408418  0.87447569  0.87264772] [ 0.49210546  0.47979785  0.48157535  0.48468386  0.485055  ]
2
0.875252910463 0.00256599067168
0.484643503227 0.00421297215709

[ 0.77875423  0.80632714  0.80193498  0.77842255  0.80777185] [ 0.33377285  0.35928818  0.37537046  0.32677323  0.37221866]
3
0.794642148895 0.0132485109005
0.35348467733 0.0198276942231

[ 0.80528106  0.80587028  0.79131073  0.79190044  0.79385062] [ 0.37400728  0.36449547  0.38150895  0.38069191  0.3473286 ]
4
0.797642628041 0.00653429127758
0.369606442416 0.0126992040444

[ 0.78822815  0.7938813   0.80700285  0.77715383  0.78371313] [ 0.339992    0.37236961  0.37081736  0.348684

## ICD-9 tasks

Calculate AUROC and AUPRC for ICD-9 prediction tasks.

In [13]:
def fetch_icd9_scores(mname, method):
    metricslist = [roc_auc_score, average_precision_score]
    reslist = []
    if method.startswith('sl'):
        for icdn in range(20):
            icdres = calc_metrics_slpy(mname, 'icd9', icdn, metricslist, method)
            reslist.append((icdn+1, icdres))
    elif method == 'dl1':
        for icdn in range(20):
            icdres = calc_metrics_dl1(mname, 'icd9', icdn, metricslist)
            reslist.append((icdn+1, icdres))
    elif method == 'ffn':
        for icdn in range(20):
            icdres = calc_metrics_ffn(mname, 'icd9', icdn, metricslist)
            reslist.append((icdn+1, icdres))
    return reslist

mnames = ['mimic3_17f_24h', 'mimic3_17f_raw_24h', 'mimic3_99p_raw_24h']
# mnames = ['mimic3_17f_48h', 'mimic3_17f_raw_48h', 'mimic3_99p_raw_48h']
reslist = []
for mname in mnames:
    if 'raw' in mname:
        methods = ['sl2', 'ffn', 'dl1']
#         methods = ['ffn', 'dl1']
    else:
        methods = ['sl1', 'sl2', 'ffn', 'dl1']
#         methods = ['ffn', 'dl1']
    for method in methods:
        reslist.append(('_'.join([mname, method]), fetch_icd9_scores(mname, method)))
        
reslist   

[('mimic3_17f_24h_sl1',
  [(1,
    [[0.73711391276088867, 0.002315333539288355],
     [0.53557736826746338, 0.0027226964341727274]]),
   (2,
    [[0.83420940685412792, 0.0090533310581387225],
     [0.72901316798235771, 0.0140606510538433]]),
   (3,
    [[0.68347980082827919, 0.0072537501532267809],
     [0.80953114781727165, 0.0056901137475941221]]),
   (4,
    [[0.67136405240369001, 0.0051624174480989727],
     [0.54542047069214772, 0.0071840070708375911]]),
   (5,
    [[0.65401913737266149, 0.0035869243897161803],
     [0.47143635349864932, 0.0047346772861897694]]),
   (6,
    [[0.62220468317187339, 0.0027891189005378606],
     [0.41115131312665998, 0.0046456083151853039]]),
   (7,
    [[0.82700615530881694, 0.0071381258501261131],
     [0.95466864168965326, 0.0025868254517693681]]),
   (8,
    [[0.69763892886309287, 0.0049295379379891381],
     [0.69595118367290043, 0.0060601301353914424]]),
   (9,
    [[0.65177849731079185, 0.0063180711614122705],
     [0.58139895339914993, 0.00801

## Draw plot for ICD-9

In [16]:
# draw plots of icd9
N = len(reslist[0][1])
ind = np.arange(N)  # the x locations for the groups
width = 0.15       # the width of the bars

fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(20, 15))
for xr in range(3):
    yr = 1
    ax = axes[xr]
    datalist = [(x[0], [y[1][yr] for y in x[1]]) for x in reslist if x[0].startswith(mnames[xr])]
    rectss = []
    for r, (mname, icd) in enumerate(datalist):
        icdaucmeans = [x[0] for x in icd]
        icdaucerr = [x[1] for x in icd]
        rects = ax.bar(ind + r * width, icdaucmeans, width, yerr=icdaucerr)
        rectss.append(rects)

    ax.set_xlabel('ICD9 Code')
    scorestrs = ['AUROC', 'AUPRC']
    featurenames = ['17 Processed', '17 Raw', '136 Raw']
    ax.set_ylabel(scorestrs[yr])
    ax.set_title('{0} scores of {1} features'.format(scorestrs[yr], featurenames[xr]))
    ax.set_xticks(ind + (len(datalist) - 1) * width / 2)
    ax.set_xticklabels(np.arange(N) + 1)

    if xr == 0:
        ax.legend(rectss, ['SL-1', 'SL-2', 'FFN', 'FFN+LSTM'])
    else:
        ax.legend(rectss, ['SL-2', 'FFN', 'FFN+LSTM'])


def autolabel(rects):
    """
    Attach a text label above each bar displaying its height
    """
    for rect in rects:
        height = rect.get_height()
        ax.text(rect.get_x() + rect.get_width()/2., 1.01*height,
                '%.4f' % height,
                ha='center', va='bottom')

# for rects in rectss:
#     autolabel(rects)
plt.ylim(0.0,1.0)
plt.show()

<matplotlib.figure.Figure at 0x7f5ac6c554a8>

## Positive ratio of ICD-9 codes

In [137]:
statdirs = [
    '../../Data/admdata_17f/24hrs/series/mv',
    '../../Data/admdata_17f/24hrs/series/cv',
    '../../Data/admdata_17f/24hrs/series/',
]
for statdir in statdirs:
    icd9all = np.load(os.path.join(statdir, 'imputed-normed-ep_1_24.npz'))['y_icd9']
    print('&'.join(['%.3f' % t for t in np.mean(icd9all, axis=0)]))

0.302&0.201&0.765&0.445&0.416&0.424&0.846&0.504&0.461&0.461&0.003&0.119&0.266&0.042&0.411&0.115&0.050&0.453&0.634&0.429
0.225&0.151&0.629&0.311&0.244&0.195&0.820&0.468&0.339&0.352&0.005&0.090&0.133&0.032&0.251&0.064&0.016&0.448&0.362&0.263
0.258&0.172&0.688&0.369&0.318&0.294&0.831&0.484&0.391&0.399&0.004&0.102&0.190&0.036&0.320&0.086&0.030&0.450&0.479&0.335


## Histogram of length of stay

In [153]:
statdirs = [
    '../../Data/admdata_17f/24hrs/series/mv',
    '../../Data/admdata_17f/24hrs/series/cv',
    '../../Data/admdata_17f/24hrs/series/',
]

fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(20, 15))

for si, statdir in enumerate(statdirs):
    losall = np.load(os.path.join(statdir, 'imputed-normed-ep_1_24.npz'))['y_los']/(60)
    ax = axes[si]
    ax.hist(losall, 200, normed=1)
    ax.set_xbound(0, 2000)
    ax.set_xlabel('Length of stay (hours)')
    ax.set_ylabel('Ratio')
    ds = [
        'Metavision (2008-2012)',
        'Carevue (2001-2008)',
        'All sources (2001-2012)'
    ]
    ax.set_title('Datasource: %s' % ds[si])
plt.show()

<matplotlib.figure.Figure at 0x7fe6748af128>

In [155]:
featurelist = {'ALANINE AMINOTRANSFERASE (ALT)': 66,
 'ALBUMIN': 69,
 'ALKALINE PHOSPHATASE': 68,
 'ANION GAP': 46,
 'ASPARATE AMINOTRANSFERASE (AST)': 67,
 'AlarmsOn': 79,
 'Albumin 5%': 11,
 'ArterialBloodPressurediastolic': 76,
 'ArterialBloodPressuremean': 77,
 'Aspirin': 70,
 'BASE EXCESS': 61,
 'BASOPHILS': 58,
 'Bisacodyl': 71,
 'CALCIUM, TOTAL': 50,
 'CALCULATED TOTAL CO2': 62,
 'CHLORIDE': 45,
 'CREATININE': 47,
 'Calcium Gluconate': 14,
 'CentralVenousPressure': 87,
 'Chest Tube #1': 5,
 'Chest Tube #2': 6,
 'D5 1/2NS': 27,
 'Docusate Sodium': 72,
 'EOSINOPHILS': 59,
 'Fecal Bag': 4,
 'Fresh Frozen Plasma': 12,
 'Furosemide (Lasix)': 17,
 'GLUCOSE': 48,
 'GT Flush': 34,
 'Gastric Gastric Tube': 0,
 'Gastric Meds': 26,
 'Glucosefingerstick': 91,
 'HEMATOCRIT': 37,
 'HEMOGLOBIN': 39,
 'HeartRateAlarm-Low': 92,
 'Humulin-R Insulin': 73,
 'Hydralazine': 18,
 'INR(PT)': 52,
 'Insulin - Regular': 22,
 'Jackson Pratt #1': 7,
 'KCL (Bolus)': 35,
 'LACTATE': 65,
 'LR': 28,
 'LYMPHOCYTES': 55,
 'Lorazepam (Ativan)': 13,
 'MAGNESIUM': 49,
 'MCH': 41,
 'MCHC': 40,
 'MCV': 42,
 'MONOCYTES': 56,
 'Magnesium Sulfate': 20,
 'Magnesium Sulfate (Bolus)': 36,
 'MeanAirwayPressure': 86,
 'Metoprolol Tartrate': 74,
 'Midazolam (Versed)': 15,
 'MinuteVolume': 83,
 'MinuteVolumeAlarm-High': 85,
 'MinuteVolumeAlarm-Low': 80,
 'Morphine Sulfate': 23,
 'NEUTROPHILS': 57,
 'Nitroglycerin': 21,
 'Norepinephrine': 19,
 'O2Flow': 90,
 'OR Crystalloid Intake': 32,
 'OR EBL': 8,
 'PCO2': 63,
 'PEEPset': 82,
 'PH': 60,
 'PHOSPHATE': 51,
 'PLATELET COUNT': 38,
 'PO Intake': 33,
 'PT': 53,
 'PTT': 54,
 'Packed Red Blood Cells': 25,
 'Pantoprazole': 75,
 'Peakinsp.Pressure': 81,
 'Phenylephrine': 16,
 'Piggyback': 31,
 'Potassium Chloride': 24,
 'Pre-Admission': 9,
 'PulmonaryArteryPressurediastolic': 95,
 'PulmonaryArteryPressuremean': 89,
 'PulmonaryArteryPressuresystolic': 93,
 'RDW': 44,
 'RED BLOOD CELLS': 43,
 'RespAlarm-High': 97,
 'RespiratoryRate': 78,
 'RespiratoryRate(Set)': 88,
 'SPECIFIC GRAVITY': 64,
 'SkinCare': 98,
 'Solution': 29,
 'SpO2DesatLimit': 96,
 'Sterile Water': 30,
 'Stool Out Stool': 1,
 'TF Residual': 10,
 'TidalVolume(observed)': 84,
 'TidalVolume(set)': 94,
 'Ultrafiltrate Ultrafiltrate': 3,
 'Urine Out Incontinent': 2,
 'arterial_pressure_mean': 116,
 'bilirubin_level': 113,
 'body_temperature': 104,
 'chloride': 124,
 'creatinine': 125,
 'diastolic_blood_pressure_mean': 115,
 'dopamine': 130,
 'epinephrine': 127,
 'fentanyl': 132,
 'fio2': 106,
 'gcseyes': 101,
 'gcsmotor': 100,
 'gcsverbal': 99,
 'glucose': 119,
 'heart_rate': 103,
 'height': 121,
 'hgb': 122,
 'ie_ratio_mean': 114,
 'midazolam': 131,
 'norepinephrine': 126,
 'pao2': 105,
 'peep': 134,
 'ph': 135,
 'phenylephrine': 128,
 'platelet': 123,
 'potassium_level_mean': 112,
 'propofol': 133,
 'respiratory_rate': 117,
 'serum_bicarbonate_level_mean': 110,
 'serum_urea_nitrogen_level': 108,
 'sodium_level_mean': 111,
 'spo2_peripheral': 118,
 'systolic_blood_pressure_abp_mean': 102,
 'urinary_output_sum': 107,
 'vasopressin': 129,
 'weight': 120,
 'white_blood_cells_count_mean': 109}
features = sorted([(value, key) for key, value in featurelist.items()], key=lambda x: x[0])
for x in features:
    print('{0},{1}'.format(x[0], x[1]))

0,Gastric Gastric Tube
1,Stool Out Stool
2,Urine Out Incontinent
3,Ultrafiltrate Ultrafiltrate
4,Fecal Bag
5,Chest Tube #1
6,Chest Tube #2
7,Jackson Pratt #1
8,OR EBL
9,Pre-Admission
10,TF Residual
11,Albumin 5%
12,Fresh Frozen Plasma
13,Lorazepam (Ativan)
14,Calcium Gluconate
15,Midazolam (Versed)
16,Phenylephrine
17,Furosemide (Lasix)
18,Hydralazine
19,Norepinephrine
20,Magnesium Sulfate
21,Nitroglycerin
22,Insulin - Regular
23,Morphine Sulfate
24,Potassium Chloride
25,Packed Red Blood Cells
26,Gastric Meds
27,D5 1/2NS
28,LR
29,Solution
30,Sterile Water
31,Piggyback
32,OR Crystalloid Intake
33,PO Intake
34,GT Flush
35,KCL (Bolus)
36,Magnesium Sulfate (Bolus)
37,HEMATOCRIT
38,PLATELET COUNT
39,HEMOGLOBIN
40,MCHC
41,MCH
42,MCV
43,RED BLOOD CELLS
44,RDW
45,CHLORIDE
46,ANION GAP
47,CREATININE
48,GLUCOSE
49,MAGNESIUM
50,CALCIUM, TOTAL
51,PHOSPHATE
52,INR(PT)
53,PT
54,PTT
55,LYMPHOCYTES
56,MONOCYTES
57,NEUTROPHILS
58,BASOPHILS
59,EOSINOPHILS
60,PH
61,BASE EXCESS
62,CALCULATED TOTAL CO2
63,