In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sys
import os
import datetime as dt

from copy import deepcopy

import sklearn
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

# logistic regression is our favourite model ever
from sklearn import linear_model

# used to calculate AUROC/accuracy
from sklearn import metrics

import seaborn as sns

# local utils
import utils

import xgboost as xgb

import pickle

# default colours/marker/linestyles for prettier plots
col = [[0.9047, 0.1918, 0.1988],
    [0.2941, 0.5447, 0.7494],
    [0.3718, 0.7176, 0.3612],
    [1.0000, 0.5482, 0.1000],
    [0.4550, 0.4946, 0.4722],
    [0.6859, 0.4035, 0.2412],
    [0.9718, 0.5553, 0.7741],
    [0.5313, 0.3359, 0.6523]];
marker = ['v','o','d','^','s','o','+']
ls = ['-','-','-','-','-','s','--','--']

plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20, 'xtick.labelsize': 16, 'ytick.labelsize': 16})

# use viridis because color theory reasons
sns.set_palette(sns.color_palette('viridis'))

## Load data

In [None]:
np.random.seed(3231)

print('=== MIMIC ===')
df_mimic = pd.read_csv('X_mimic_day1.csv.gz', sep=',', index_col=0)
var_other = ['hospitalid', 'death', 'hosp_los', 'ventdays']

# convenient reference to death column
y_mimic = df_mimic['death'].values
X_mimic = df_mimic.drop(var_other, axis=1).values

print('{:5s} - {} observations. Outcome rate: {:2.2f}%.'.format(
        'mimic', X_mimic.shape[0], 100.0*np.mean(y_mimic)))
idx = df_mimic['hospitalid']==0
print('CareVue - {} observations. Outcome rate: {:2.2f}%.'.format(
        idx.sum(), 100.0*np.mean(df_mimic.loc[idx, 'death'])))
idx = df_mimic['hospitalid']==1
print('MetaVision - {} observations. Outcome rate: {:2.2f}%.'.format(
        idx.sum(), 100.0*np.mean(df_mimic.loc[idx, 'death'])))

print('=== eICU ===')
df_eicu = pd.read_csv('X_eicu_day1.csv.gz', sep=',', index_col=0)

MIN_PAT = 500

hospital_list = df_eicu['hospitalid'].value_counts()
hospital_list = hospital_list[hospital_list>=MIN_PAT].index.values
print('Retaining {} of {} hospitals with at least {} patients.'.format(
    len(hospital_list), df_eicu['hospitalid'].nunique(), MIN_PAT))

df_eicu = df_eicu.loc[df_eicu['hospitalid'].isin(hospital_list), :]

hospitals_train = np.random.permutation(len(hospital_list))
hospitals_train = hospitals_train[0:int(len(hospitals_train)/2)]
hospitals_train = hospital_list[hospitals_train]

var_other = ['hospitalid', 'death', 'hosp_los', 'ventdays']

# create a test set of 50% of the hospitals
df_eicu_test = df_eicu.loc[~df_eicu['hospitalid'].isin(hospitals_train), :]

# convenient reference to death column
df_eicu_train = df_eicu.loc[df_eicu['hospitalid'].isin(hospitals_train), :]

print('{} observations.'.format(df_eicu.shape[0]))
print('eICU - Train', end=' ')
print('{} observations. Outcome rate: {:2.2f}%.'.format(
        df_eicu_train.shape[0], 100.0*np.mean(df_eicu_train['death'])))

print('eICU - Test', end=' ')
print('{} observations. Outcome rate: {:2.2f}%.'.format(
        df_eicu_test.shape[0], 100.0*np.mean(df_eicu_test['death'])))

In [None]:
df_eicu.columns

In [None]:
tableone.TableOne?

In [None]:
import tableone
df_tmp = df_eicu[['hosp_los', 'is_female',  'age', 'electivesurgery']].copy()
df_tmp['Race'] = 'White'
df_tmp.loc[df_eicu['race_black'] == 1, 'Race'] = 'African Americans'
df_tmp.loc[df_eicu['race_hispanic'] == 1, 'Race'] = 'Hispanic'
df_tmp.loc[df_eicu['race_asian'] == 1, 'Race'] = 'Asian'
df_tmp.loc[df_eicu['race_other'] == 1, 'Race'] = 'Other (non-Caucasian)'
df_tmp.rename({'hosp_los': 'Hospital LOS (days)'}, axis=1, inplace=True)
t1 = tableone.TableOne(df_tmp, categorical=['is_female', 'electivesurgery', 'Race'],
                       nonnormal=['Hospital LOS (days)', 'age'])
print(t1.to_latex())

In [None]:
def prep_data(df, target='death', features=None):
    y = df[target].values
    if features is None:
        features = df.columns

    # by default, exclude the target in the features
    features = [x for x in features if x != target]
        
    X = df[features].values
    
    return X, y

def train_model(df, target='death', features=None):
    """
    Given a dataframe, trains a logistic regression model using all features.
    
    Features can be optionally specified with the `features` argument
    """
    X, y = prep_data(df, target=target, features=features)
    
    # train model
    #base_mdl = linear_model.LogisticRegressionCV(Cs=10, penalty='l2', cv=5)
    base_mdl = linear_model.LogisticRegression(penalty='l2', solver='lbfgs')
    
    mdl = Pipeline([("imputer", SimpleImputer(missing_values=np.nan,
                                      strategy="mean")),
                  ("scaler", StandardScaler()),
                  ('model', base_mdl)]) 

    # train model
    mdl = mdl.fit(X, y)
    
    return mdl

def predict_with_model(mdl, df, target='death', features=None):
    """
    Given a model and dataframe, output predictions for a specific set of features.
    
    Features can be optionally specified with the `features` argument
    """
    X, y = prep_data(df, target=target, features=features)
    
    prob = mdl.predict_proba(X)[:,1]
    
    return prob

def hospital_stats(x, outcome='death'):
    """
    Easy way to calculate a few stats on a group by of a dataframe.
    Requires predictions be in column called 'prob'
    """
    d = {}
    # calculate SMR
    d['count'] = x[outcome].count()
    d['outcome'] = x[outcome].sum()
    
    d['smr'] = x[outcome].sum() / x['prob'].sum()
    d['mse'] = metrics.mean_squared_error(x[outcome], x['prob'])
    d['mae'] = metrics.mean_absolute_error(x[outcome], x['prob'])
    d['logloss'] = metrics.log_loss(x[outcome], x['prob'])
    if len(np.unique(x[outcome]))<=1:
        d['auc'] = np.nan
    else:
        d['auc'] = metrics.roc_auc_score(x[outcome], x['prob'])
    return pd.Series(d)

def evaluate_predictions(prob, df, target='death'):
    """
    Given a set of predictions, evaluate their quality.
    
    Returns a dictionary of statistics.
    """
    
    y = df[target]
    
    d = dict()
    
    # calculate SMR
    d['count'] = y.count()
    d['outcome'] = y.sum()
    
    d['smr'] = y.sum() / prob.sum()
    d['mse'] = metrics.mean_squared_error(y, prob)
    d['mae'] = metrics.mean_absolute_error(y, prob)
    d['logloss'] = metrics.log_loss(y, prob)
    if len(np.unique(y))<=1:
        d['auc'] = np.nan
    else:
        d['auc'] = metrics.roc_auc_score(y, prob)
    return d

In [None]:
from sklearn.model_selection import cross_validate

def smr(y_true, y_prob):
    return np.sum(y_true) / np.sum(y_prob)

def cv_model(df, target='death', features=None, mdl=None):
    """
    Given a dataframe, trains a logistic regression model using all features, and returns 3-fold CV performance.
    
    Features can be optionally specified with the `features` argument
    """
    X, y = prep_data(df, target=target, features=features)
    
    # if no input model is defined given, do a logistic regression
    if mdl is None:
        base_mdl = linear_model.LogisticRegression(penalty='l2', solver='lbfgs')

        mdl = Pipeline([("imputer", SimpleImputer(missing_values=np.nan,
                                          strategy="mean")),
                      ("scaler", StandardScaler()),
                      ('model', base_mdl)])
        
    scoring = {'auc': 'roc_auc', 'smr': metrics.make_scorer(smr, needs_proba=True)}

    # train model
    scores = cross_validate(mdl, X, y, cv=5, scoring=scoring)
    
    return {'auc': np.mean(scores['test_auc']), 'smr': np.mean(scores['test_smr'])}

# Experiments

In [None]:
df_eicu.columns

In [None]:
# define features
features = [x for x in df_mimic.columns if x not in ['hospitalid', 'death', 'hosp_los', 'ventdays']]

# drop race/elective surgery
features = [x for x in features if not x.startswith('race_')]
features = [x for x in features if x not in ('electivesurgery')]

# drop min/max
features = [x for x in features if not x.endswith('_max')]
features = [x for x in features if not x.endswith('_min')]
# drop blood gases
#features = [x for x in features if not x.startswith('bg_')]

print(len(features))

## 1. Time (CareVue -> MetaVision)

1. Train model on CareVue
2. Calculate predictions on MetaVision
3. Evaluate performance

In [None]:
# training dataframe
df_train = df_mimic.loc[df_mimic['hospitalid']==0, :]
df_test = df_mimic.loc[df_mimic['hospitalid']==1, :]

mdl = train_model(df_train, target='death', features=features)
prob = predict_with_model(mdl, df_test, target='death', features=features)
mimic_perf = evaluate_predictions(prob, df_test, target='death')

print('Train on CareVue, evaluate on MetaVision.')
for p in mimic_perf:
    print('{:20s}: {}'.format(p, mimic_perf[p]))

## 2. Time and space (MIMIC CareVue -> eICU)

In [None]:
# training dataframe
df_train = df_mimic.loc[df_mimic['hospitalid']==0, :]

mdl = train_model(df_train, target='death', features=features)

np.random.seed(56534)

mimic_to_eicu = None

for h in hospital_list:
    if h in hospitals_train:
        continue
    df_test = df_eicu.loc[df_eicu['hospitalid'] == h, :]
    
    prob = predict_with_model(mdl, df_test, target='death', features=features)
    perf = evaluate_predictions(prob, df_test, target='death')
    
    # append perf to a dataframe
    if mimic_to_eicu is None:
        mimic_to_eicu = pd.DataFrame(perf, index=[h])
    else:
        mimic_to_eicu = pd.concat([mimic_to_eicu, pd.DataFrame(perf, index=[h])])
    
# add original data size to results
mimic_to_eicu.head()

Plot the performance of a MIMIC-III CareVue model on eICU hospitals.

In [None]:
# create a bubble plot AUROC vs SMR

# blue circles for performance of MIMIC model on eICU
plt.figure(figsize=[10,6])
sns.scatterplot('auc', 'smr', data=mimic_to_eicu,
            s=np.ceil(mimic_to_eicu['count']),
            marker='o', edgecolors=None, alpha=0.5, color='#348ABD')

# red hatch for performance of carevue (MIMIC) model on metavision (MIMIC)
plt.scatter(mimic_perf['auc'], mimic_perf['smr'],
            s=(df_mimic['hospitalid']==1).sum()/10, hatch='+',
            marker='o', edgecolors='k', alpha=0.5, color='#E24A33',
            label='MIMIC-III')

# lines for mean performance
plt.plot( (mimic_to_eicu['auc']).mean()*np.ones(2), [0,1], '--', color=[0.1,0.1,0.1],alpha=0.7)
plt.plot( [0.6,1.0], (mimic_to_eicu['smr']).mean()*np.ones(2), '--', color=[0.1,0.1,0.1],alpha=0.7)


plt.yticks(np.linspace(0, 2, 5))
plt.xticks(np.linspace(0.6, 1, 5))
plt.ylim([-0.2,1.2])
plt.xlim([0.55,1.05])
plt.ylabel('SMR')
plt.xlabel('AUROC')
plt.savefig('mimic-to-eicu.png')
plt.show()

print('Average AUROC: {:0.3f}'.format((mimic_to_eicu['auc']).mean()))
print('Average SMR: {:0.3f}'.format((mimic_to_eicu['smr']).mean()))

In [None]:
mdl_recalib = deepcopy(mdl)
prob = np.zeros(df_eicu.shape[0])

for h in hospital_list:
    if h in hospitals_train:
        continue
    idxKeep = df_eicu['hospitalid'] == h
    
    # get current hosp data and impute missing data
    X_current, y_current = prep_data(df_eicu.loc[idxKeep, :], target='death', features=features)
    X_current_t = mdl_recalib.named_steps['imputer'].transform(X_current)
    
    # refit the scaler
    mdl_recalib.named_steps['scaler'] = mdl_recalib.named_steps['scaler'].fit(X_current_t)
    X_current_t = mdl_recalib.named_steps['scaler'].transform(X_current_t)
    
    # get prob on new dataset
    curr_prob = mdl_recalib.named_steps['model'].predict_proba(X_current_t)[:,1]
    prob[idxKeep] = curr_prob
    
# add predictions to dataframe
df_prob = df_eicu[['hospitalid', 'death']].copy()
df_prob['prob'] = prob
df_prob = df_prob.loc[~df_prob['hospitalid'].isin(hospitals_train), :]

# re-calculate hospital stats - scatter plot grouped by hospital
h_stats_normed = df_prob.groupby('hospitalid').apply(hospital_stats)

# overall auroc
auc = metrics.roc_auc_score(df_prob['death'], df_prob['prob'])
print('eICU overall AUROC: {:0.3f}'.format(auc))
print('eICU overall SMR:   {:0.3f}'.format(df_prob['death'].sum() / df_prob['prob'].sum()))

# create a bubble plot AUROC vs SMR
#Average AUROC: 0.856
#Average SMR: 0.964
    
# blue circles for performance of eICU model on eICU
plt.figure(figsize=[10,6])
sns.scatterplot('auc', 'smr', data=h_stats_normed,
            s=np.ceil(h_stats_normed['count']),
            marker='o', edgecolors=None, alpha=0.5, color='#348ABD')

# red hatch for performance of carevue (MIMIC) model on metavision (MIMIC)
plt.scatter(eicu_cv_perf['auc'], eicu_cv_perf['smr'],
            s=df_eicu_train.shape[0]/10, hatch='+',
            marker='o', edgecolors='k', alpha=0.5, color='#E24A33',
            label='MIMIC-III')

# lines for mean performance
plt.plot( (h_stats_normed['auc']).mean()*np.ones(2), [0.2, 1.8], '--', color=[0.1,0.1,0.1],alpha=0.7)
plt.plot( [0.75, 0.95], (h_stats_normed['smr']).mean()*np.ones(2), '--', color=[0.1,0.1,0.1],alpha=0.7)


plt.yticks(np.linspace(0, 2, 5))
plt.xticks(np.linspace(0.6, 1, 5))
plt.ylim([0, 2])
plt.xlim([0.7,1.0])
plt.ylabel('SMR')
plt.xlabel('AUROC')
plt.show()

print('Average AUROC: {:0.3f}'.format((h_stats_normed['auc']).mean()))
print('Average SMR: {:0.3f}'.format((h_stats_normed['smr']).mean()))

## 3. Space (eICU <-> eICU)

In [None]:
# get cross-validation AUROC
eicu_cv_perf = cv_model(df_eicu_train, target='death', features=features)

# train model using entire dataset
mdl = train_model(df_eicu_train, target='death', features=features)

np.random.seed(56534)

eicu_to_eicu = None

for h in hospital_list:
    if h in hospitals_train:
        continue
    df_test = df_eicu.loc[df_eicu['hospitalid'] == h, :]
    
    prob = predict_with_model(mdl, df_test, target='death', features=features)
    perf = evaluate_predictions(prob, df_test, target='death')
    
    # append perf to a dataframe
    if eicu_to_eicu is None:
        eicu_to_eicu = pd.DataFrame(perf, index=[h])
    else:
        eicu_to_eicu = pd.concat([eicu_to_eicu, pd.DataFrame(perf, index=[h])])
    
# add original data size to results
eicu_to_eicu.head()

In [None]:
print(eicu_cv_perf)
print('{:3.2f}% of hospitals had AUROC lower than in CV.'.format(100*(eicu_to_eicu['auc'] < eicu_cv_perf['auc']).mean()))

In [None]:
eicu_to_eicu.sort_values('auc')

In [None]:
# create a bubble plot AUROC vs SMR

# blue circles for performance of eICU model on eICU
plt.figure(figsize=[10,6])
sns.scatterplot('auc', 'smr', data=eicu_to_eicu,
            s=np.ceil(eicu_to_eicu['count']),
            marker='o', edgecolors=None, alpha=0.5, color='#348ABD')

# red hatch for performance of carevue (MIMIC) model on metavision (MIMIC)
plt.scatter(eicu_cv_perf['auc'], eicu_cv_perf['smr'],
            s=df_eicu_train.shape[0]/10, hatch='+',
            marker='o', edgecolors='k', alpha=0.5, color='#E24A33',
            label='MIMIC-III')

# lines for mean performance
plt.plot( (eicu_to_eicu['auc']).mean()*np.ones(2), [0.2, 1.8], '--', color=[0.1,0.1,0.1],alpha=0.7)
plt.plot( [0.75, 0.95], (eicu_to_eicu['smr']).mean()*np.ones(2), '--', color=[0.1,0.1,0.1],alpha=0.7)


plt.yticks(np.linspace(0, 2, 5))
plt.xticks(np.linspace(0.6, 1, 5))
plt.ylim([0, 2])
plt.xlim([0.7,1.0])
plt.ylabel('SMR')
plt.xlabel('AUROC')
plt.show()

print('Average AUROC: {:0.3f}'.format((eicu_to_eicu['auc']).mean()))
print('Average SMR: {:0.3f}'.format((eicu_to_eicu['smr']).mean()))

### eICU -> eICU, recalibrating

In [None]:
df_eicu.head()

In [None]:
mdl_recalib = deepcopy(mdl)
prob = np.zeros(df_eicu.shape[0])

for h in hospital_list:
    if h in hospitals_train:
        continue
    idxKeep = df_eicu['hospitalid'] == h
    
    # get current hosp data and impute missing data
    X_current, y_current = prep_data(df_eicu.loc[idxKeep, :], target='death', features=features)
    X_current_t = mdl_recalib.named_steps['imputer'].transform(X_current)
    
    # refit the scaler
    mdl_recalib.named_steps['scaler'] = mdl_recalib.named_steps['scaler'].fit(X_current_t)
    X_current_t = mdl_recalib.named_steps['scaler'].transform(X_current_t)
    
    # get prob on new dataset
    curr_prob = mdl_recalib.named_steps['model'].predict_proba(X_current_t)[:,1]
    prob[idxKeep] = curr_prob
    
# add predictions to dataframe
df_prob = df_eicu[['hospitalid', 'death']].copy()
df_prob['prob'] = prob
df_prob = df_prob.loc[~df_prob['hospitalid'].isin(hospitals_train), :]

# re-calculate hospital stats - scatter plot grouped by hospital
h_stats_normed = df_prob.groupby('hospitalid').apply(hospital_stats)

# overall auroc
auc = metrics.roc_auc_score(df_prob['death'], df_prob['prob'])
print('eICU overall AUROC: {:0.3f}'.format(auc))
print('eICU overall SMR:   {:0.3f}'.format(df_prob['death'].sum() / df_prob['prob'].sum()))

display(h_stats_normed)

# create a bubble plot AUROC vs SMR
#Average AUROC: 0.856
#Average SMR: 0.964
    
# blue circles for performance of eICU model on eICU
plt.figure(figsize=[10,6])
sns.scatterplot('auc', 'smr', data=h_stats_normed,
            s=np.ceil(h_stats_normed['count']),
            marker='o', edgecolors=None, alpha=0.5, color='#348ABD')

# red hatch for performance of carevue (MIMIC) model on metavision (MIMIC)
plt.scatter(eicu_cv_perf['auc'], eicu_cv_perf['smr'],
            s=df_eicu_train.shape[0]/10, hatch='+',
            marker='o', edgecolors='k', alpha=0.5, color='#E24A33',
            label='MIMIC-III')

# lines for mean performance
plt.plot( (h_stats_normed['auc']).mean()*np.ones(2), [0.2, 1.8], '--', color=[0.1,0.1,0.1],alpha=0.7)
plt.plot( [0.75, 0.95], (h_stats_normed['smr']).mean()*np.ones(2), '--', color=[0.1,0.1,0.1],alpha=0.7)


plt.yticks(np.linspace(0, 2, 5))
plt.xticks(np.linspace(0.6, 1, 5))
plt.ylim([0, 2])
plt.xlim([0.7,1.0])
plt.ylabel('SMR')
plt.xlabel('AUROC')
plt.show()

print('Average AUROC: {:0.3f}'.format((h_stats_normed['auc']).mean()))
print('Average SMR: {:0.3f}'.format((h_stats_normed['smr']).mean()))

In [None]:
#Average AUROC: 0.844
#Average SMR: 0.954

In [None]:
eicu_to_eicu.sort_index().head()

In [None]:
h_stats_normed.head()

In [None]:
df_h_s = eicu_to_eicu.copy().merge(h_stats_normed, how='inner', left_index=True, right_index=True, suffixes=('', '_norm'))
df_h_s['auc_diff'] = df_h_s['auc_norm'] - df_h_s['auc']
df_h_s['smr_diff'] = df_h_s['smr_norm'] - df_h_s['smr']

# create a histogram
plt.figure(figsize=[10,6])
sns.distplot(df_h_s['auc_diff'], bins=np.linspace(-0.1, 0.1, 21))
plt.xticks(np.linspace(-0.1, 0.1, 5))
plt.xlabel('AUROC improvement ')
plt.ylabel('Number of hospitals')
plt.show()

plt.figure(figsize=[10,6])
sns.distplot(df_h_s['smr_diff'], bins=np.linspace(-0.5, 0.5, 21))
plt.xticks(np.linspace(-1, 1, 5))
plt.xlabel('SMR improvement ')
plt.ylabel('Number of hospitals')
plt.show()

# Recalibrate on a single hospital

In [None]:
[h for h in hospital_list if h not in hospitals_train]

In [None]:
# dictionary to store all results
imp_local = dict()
imp_excl = dict()

In [None]:
# each sample will increment by N_PT patients
N_PT = 100

for h in [264, 443, 252]:
    idx = df_eicu['hospitalid'] == h
    df_hosp = df_eicu.loc[idx, :].copy()
    print('{} patients in hospital {}.'.format(pt.shape[0], h))

    # stratified sampling avoids issues around fluctuating deaths + low sample sizes
    pt_lived = df_hosp.loc[df_hosp['death']==0, :].index.values
    pt_died  = df_hosp.loc[df_hosp['death']==1, :].index.values

    # ensure that a death happens at idxDied
    idxDied  = np.linspace(0, df_hosp.shape[0]-1, len(pt_died)).astype(int)
    idxLived = np.asarray([x for x in range(df_hosp.shape[0]) if x not in idxDied], dtype=int)

    xi = [i for i in range(N_PT, df_hosp.shape[0]-500, N_PT)]
    M = len(xi)

    seed = 1292
    np.random.seed(seed)

    B = 50
    improvement_curve_local = list()
    improvement_curve_excl = dict()
    for b in range(B):
        # reshuffle patients
        idx0 = np.random.permutation(pt_died.shape[0])
        idx1 = np.random.permutation(pt_lived.shape[0])
        pt = np.zeros(df_hosp.shape[0])
        pt[idxDied] = pt_died[idx0]
        pt[idxLived] = pt_lived[idx1]

        # get optimal performance on the entire hospital
        #mdl_best = 
        #print('\nOptimal performance:')
        #print(eicu_to_eicu.loc[hospital, :])

        # fix a test set
        pt_test = pt[-500:]

        # evaluate on all patients in this bin
        df_test  = df_hosp.loc[pt_test, :]

        # evaluate the pre-trained model
        prob = predict_with_model(mdl, df_test, target='death', features=features)
        test_perf = evaluate_predictions(prob, df_test, target='death')
        improvement_curve_excl[b] = test_perf

        for i in range(M):
            df_train = df_hosp.loc[pt[0:xi[i]], :]

            # fit a model with patients up to i
            mdl_custom = train_model(df_train, target='death', features=features)
            prob = predict_with_model(mdl_custom, df_test, target='death', features=features)
            p = evaluate_predictions(prob, df_test, target='death')
            p['patient'] = (i+1)*100
            p['b'] = b
            improvement_curve_local.append(p)
        print('{}..'.format(b), end=' ')

    improvement_curve_local = pd.DataFrame.from_records(improvement_curve_local)
    improvement_curve_excl = pd.DataFrame.from_dict(improvement_curve_excl, orient='index')

    # reshape imp_excl for seaborn
    improvement_curve_excl_tmp = improvement_curve_excl.copy()
    improvement_curve_excl_tmp['iteration'] = 0
    improvement_curve_excl_tmp2 = improvement_curve_excl_tmp.copy()
    improvement_curve_excl_tmp2['iteration'] = M-1
    improvement_curve_excl_tmp = pd.concat([improvement_curve_excl_tmp,
                                            improvement_curve_excl_tmp2],
                                           axis=0, ignore_index=True)

    # save result
    imp_local[h] = improvement_curve_local
    imp_excl[h] = improvement_curve_excl_tmp

In [None]:
# plot the result
h = 73
improvement_curve_local = imp_local[h].copy()
improvement_curve_excl = imp_excl[h].copy()

improvement_curve_local['patient'] = (improvement_curve_local['iteration']+1)*100
improvement_curve_excl['patient']  = (improvement_curve_excl['iteration']+1)*100

plt.figure(figsize=[12, 10])
sns.lineplot('patient', 'auc', data=improvement_curve_excl, label='Train on development hospitals')
sns.lineplot('patient', 'auc', data=improvement_curve_local, label='Train on local hospital')
plt.ylabel
plt.show()

In [None]:
# plot the result
h = 264
improvement_curve_local = imp_local[h].copy()
improvement_curve_excl = imp_excl[h].copy()

improvement_curve_local['patient'] = (improvement_curve_local['iteration']+1)*100
improvement_curve_excl['patient']  = (improvement_curve_excl['iteration']+1)*100

plt.figure(figsize=[12, 10])
sns.lineplot('iteration', 'auc', data=improvement_curve_excl, label='Train on development hospitals')
sns.lineplot('iteration', 'auc', data=improvement_curve_local, label='Train on local hospital')
plt.show()

In [None]:
# plot the result
h = 443
improvement_curve_local = imp_local[h].copy()
improvement_curve_excl = imp_excl[h].copy()

improvement_curve_local['patient'] = (improvement_curve_local['iteration']+1)*100
improvement_curve_excl['patient']  = (improvement_curve_excl['iteration']+1)*100

plt.figure(figsize=[12, 10])
sns.lineplot('iteration', 'auc', data=improvement_curve_excl, label='Train on development hospitals')
sns.lineplot('iteration', 'auc', data=improvement_curve_local, label='Train on local hospital')
plt.show()

## Recalibrate the models

In [None]:
imp_local_recalib = dict()
imp_excl_recalib = dict()

In [None]:
# each sample will increment by N_PT patients
N_PT = 200
h = 73

idx = df_eicu['hospitalid'] == h
df_hosp = df_eicu.loc[idx, :].copy()
print('{} patients in hospital {}.'.format(pt.shape[0], h))

# stratified sampling avoids issues around fluctuating deaths + low sample sizes
pt_lived = df_hosp.loc[df_hosp['death']==0, :].index.values
pt_died  = df_hosp.loc[df_hosp['death']==1, :].index.values

# ensure that a death happens at idxDied
idxDied  = np.linspace(0, df_hosp.shape[0]-1, len(pt_died)).astype(int)
idxLived = np.asarray([x for x in range(df_hosp.shape[0]) if x not in idxDied], dtype=int)

xi = [i for i in range(N_PT, df_hosp.shape[0]-500, N_PT)]
M = len(xi)

seed = 3321
np.random.seed(seed)

B = 50
improvement_curve_local = list()
improvement_curve_excl  = list()
for b in range(B):
    # reshuffle patients
    idx0 = np.random.permutation(pt_died.shape[0])
    idx1 = np.random.permutation(pt_lived.shape[0])
    pt = np.zeros(df_hosp.shape[0])
    pt[idxDied] = pt_died[idx0]
    pt[idxLived] = pt_lived[idx1]

    # get optimal performance on the entire hospital
    #mdl_best = 
    #print('\nOptimal performance:')
    #print(eicu_to_eicu.loc[hospital, :])

    # fix a test set
    pt_test = pt[-500:]

    # evaluate on all patients in this bin
    df_test  = df_hosp.loc[pt_test, :]

    for i in range(M):
        df_train = df_hosp.loc[pt[0:xi[i]], :]

        # recalibrate the pre-trained model
        prob = predict_with_model(mdl, df_train, target='death', features=features)
        prob = np.log(1/(1-prob))
        mdl_recalib = linear_model.LogisticRegression(solver='lbfgs').fit(prob.reshape([-1, 1]), df_train['death'].values)

        # evaluate the model
        prob = predict_with_model(mdl, df_test, target='death', features=features)
        prob = np.log(1/(1-prob))
        prob = mdl_recalib.predict_proba(prob.reshape([-1, 1]))[:, 1]
        test_perf = evaluate_predictions(prob, df_test, target='death')
        test_perf['patient'] = (i+1)*N_PT
        test_perf['b'] = b
        
        improvement_curve_excl.append(test_perf)

        # fit a model with patients up to i
        mdl_custom = train_model(df_train, target='death', features=features)
        prob = predict_with_model(mdl_custom, df_test, target='death', features=features)
        p = evaluate_predictions(prob, df_test, target='death')
        p['patient'] = (i+1)*N_PT
        p['b'] = b
        improvement_curve_local.append(p)
    print('{}..'.format(b), end=' ')

improvement_curve_local = pd.DataFrame.from_records(improvement_curve_local)
improvement_curve_excl  = pd.DataFrame.from_records(improvement_curve_excl)

# save result
imp_local_recalib[h] = improvement_curve_local
imp_excl_recalib[h] = improvement_curve_excl

In [None]:
improvement_curve_local.groupby('patient').mean()

In [None]:
# plot the result
h = 73
improvement_curve_local = imp_local_recalib[h]
improvement_curve_excl = imp_excl_recalib[h]

print('AUROC: {}'.format(improvement_curve_excl.loc[0, 'auc']))

plt.figure(figsize=[12, 10])
p = sns.lineplot('patient', 'auc', data=improvement_curve_excl,
                  color='#E24A33', label='Recalibrate training set model')
sns.lineplot('patient', 'auc', data=improvement_curve_local,
              color='#348ABD', label='Retrain using only local hospital')

sns.lineplot('patient', 'smr', data=improvement_curve_excl, marker='o', markersize=10,
                  color='#E24A33', label='__no_legend__')
sns.lineplot('patient', 'smr', data=improvement_curve_local, marker='^', markersize=10,
              color='#348ABD', label='__no_legend__')
plt.ylabel('AUROC / SMR')
plt.xlabel('Number of patients')
plt.show()

In [None]:
# each sample will increment by N_PT patients
N_PT = 200
h = 443

idx = df_eicu['hospitalid'] == h
df_hosp = df_eicu.loc[idx, :].copy()
print('{} patients in hospital {}.'.format(pt.shape[0], h))

# stratified sampling avoids issues around fluctuating deaths + low sample sizes
pt_lived = df_hosp.loc[df_hosp['death']==0, :].index.values
pt_died  = df_hosp.loc[df_hosp['death']==1, :].index.values

# ensure that a death happens at idxDied
idxDied  = np.linspace(0, df_hosp.shape[0]-1, len(pt_died)).astype(int)
idxLived = np.asarray([x for x in range(df_hosp.shape[0]) if x not in idxDied], dtype=int)

xi = [i for i in range(N_PT, df_hosp.shape[0]-500, N_PT)]
M = len(xi)

seed = 3321
np.random.seed(seed)

B = 50
improvement_curve_local = list()
improvement_curve_excl  = list()
for b in range(B):
    # reshuffle patients
    idx0 = np.random.permutation(pt_died.shape[0])
    idx1 = np.random.permutation(pt_lived.shape[0])
    pt = np.zeros(df_hosp.shape[0])
    pt[idxDied] = pt_died[idx0]
    pt[idxLived] = pt_lived[idx1]

    # get optimal performance on the entire hospital
    #mdl_best = 
    #print('\nOptimal performance:')
    #print(eicu_to_eicu.loc[hospital, :])

    # fix a test set
    pt_test = pt[-500:]

    # evaluate on all patients in this bin
    df_test  = df_hosp.loc[pt_test, :]

    for i in range(M):
        df_train = df_hosp.loc[pt[0:xi[i]], :]

        # recalibrate the pre-trained model
        prob = predict_with_model(mdl, df_train, target='death', features=features)
        prob = np.log(1/(1-prob))
        mdl_recalib = linear_model.LogisticRegression(solver='lbfgs').fit(prob.reshape([-1, 1]), df_train['death'].values)

        # evaluate the model
        prob = predict_with_model(mdl, df_test, target='death', features=features)
        prob = np.log(1/(1-prob))
        prob = mdl_recalib.predict_proba(prob.reshape([-1, 1]))[:, 1]
        test_perf = evaluate_predictions(prob, df_test, target='death')
        test_perf['patient'] = (i+1)*N_PT
        test_perf['b'] = b
        
        improvement_curve_excl.append(test_perf)

        # fit a model with patients up to i
        mdl_custom = train_model(df_train, target='death', features=features)
        prob = predict_with_model(mdl_custom, df_test, target='death', features=features)
        p = evaluate_predictions(prob, df_test, target='death')
        p['patient'] = (i+1)*N_PT
        p['b'] = b
        improvement_curve_local.append(p)
    print('{}..'.format(b), end=' ')

improvement_curve_local = pd.DataFrame.from_records(improvement_curve_local)
improvement_curve_excl  = pd.DataFrame.from_records(improvement_curve_excl)

# save result
imp_local_recalib[h] = improvement_curve_local
imp_excl_recalib[h] = improvement_curve_excl

In [None]:
# plot the result
h = 443
improvement_curve_local = imp_local_recalib[h]
improvement_curve_excl = imp_excl_recalib[h]

plt.figure(figsize=[12, 10])
p = sns.lineplot('patient', 'auc', data=improvement_curve_excl,
                  color='#E24A33', label='Recalibrate training set model')
sns.lineplot('patient', 'auc', data=improvement_curve_local,
              color='#348ABD', label='Retrain using only local hospital')

sns.lineplot('patient', 'smr', data=improvement_curve_excl, marker='o',
                  color='#E24A33', label='__no_legend__')
sns.lineplot('patient', 'smr', data=improvement_curve_local, marker='o',
              color='#348ABD', label='__no_legend__')
plt.ylabel('AUROC / SMR')
plt.xlabel('Number of patients')
plt.show()