In [1]:
import pandas as pd
import numpy as np

import os
import re

from pycox.evaluation import EvalSurv

from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, precision_score, recall_score, plot_confusion_matrix

import matplotlib.pyplot as plt

import seaborn as sns

import sqlite3

# Prepare the Evaluation Data
## Add the Test set Data into the DB

## Add the test data outcomes

In [2]:
time_var = 'REMAINING_SURVIVAL_TIME'

In [12]:
eval_data = pd.read_csv('data/loans_labelled_test.csv')

eval_data = eval_data[['DEFAULT', 'REMAINING_SURVIVAL_TIME']].copy()
eval_data = eval_data.reset_index()
eval_data.columns = ['ID','EVENT','REMAINING_SURVIVAL_TIME']
eval_data.ID = eval_data.ID + 1
eval_data

Unnamed: 0,ID,EVENT,REMAINING_SURVIVAL_TIME
0,1,0,38
1,2,0,6
2,3,0,38
3,4,0,37
4,5,0,38
...,...,...,...
40756,40757,0,46
40757,40758,0,46
40758,40759,0,46
40759,40760,0,46


In [14]:
db_path = './predictions/test_evaluation.sqlite'
con = sqlite3.connect(db_path)

eval_data.to_sql('TB01_OUTCOMES',con, index=False, if_exists='replace')

con.close()

## Import Predictions

In [3]:
con = sqlite3.connect('./predictions/test_evaluation.sqlite')
surv_predictions = pd.read_sql('SELECT * FROM TB02_PREDICTIONS', con)
surv_predictions.ID = pd.to_numeric(surv_predictions.ID)
con.close()

In [4]:
surv_predictions.MODEL.value_counts()

DeepHit_MC     1997289
DeepSurv_MC    1997289
COX_BAYES      1958448
Name: MODEL, dtype: int64

## C-Index

In [5]:
def get_c_index(surv, times, events):
    ev = EvalSurv(surv, times, events, censor_surv='km')

    return ev.concordance_td()

In [6]:
con = sqlite3.connect('./predictions/test_evaluation.sqlite')
test_outcomes = pd.read_sql('SELECT * FROM TB01_OUTCOMES', con)
con.close()

## Calculate Evaluation Metrics for all Models

In [7]:
models = ['DeepSurv_MC', 'DeepHit_MC', 'COX_BAYES']

defaults_ids = test_outcomes[test_outcomes.EVENT == 1].ID

evaluation_results = None

for model in models:
    
    print(model)
    
    model_predictions = surv_predictions[surv_predictions.MODEL == model]
    
    # transform into wide format for C-Index calculation
    wide_model_surv = model_predictions[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    
    ## Sanity Checks
    print("Correct number instances?")
    print(wide_model_surv.shape[1] == test_outcomes.ID.max())
    
    print("Correct IDs?")
    print(sum(list(wide_model_surv.columns)) == test_outcomes.ID.sum())
    
    
    ## C-Index
    model_c_index = get_c_index(wide_model_surv, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    ## Prediction Interval
    # Distance of PI
    pi_80_distance = (model_predictions.PRED_80_HIGH - model_predictions.PRED_80_LOW).mean()
    pi_95_distance = (model_predictions.PRED_95_HIGH - model_predictions.PRED_95_LOW).mean()
    
    # Quality of PI
    # 80 % PI
    defaults = surv_predictions.loc[surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_80_LOW']]
    non_defaults = surv_predictions.loc[~surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_80_HIGH']]

    defaults.columns = ['ID','PERIOD','PRED_MEAN']
    non_defaults.columns = ['ID','PERIOD','PRED_MEAN']

    opt_model_preds = defaults.append(non_defaults)
    opt_model_preds = opt_model_preds.sort_values(['ID','PERIOD'])

    opt_model_preds = opt_model_preds[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    opt_80_model_c_index = get_c_index(opt_model_preds, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    # 95 % PI
    defaults = surv_predictions.loc[surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_95_LOW']]
    non_defaults = surv_predictions.loc[~surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_95_HIGH']]

    defaults.columns = ['ID','PERIOD','PRED_MEAN']
    non_defaults.columns = ['ID','PERIOD','PRED_MEAN']

    opt_model_preds = defaults.append(non_defaults)
    opt_model_preds = opt_model_preds.sort_values(['ID','PERIOD'])

    opt_model_preds = opt_model_preds[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    opt_95_model_c_index = get_c_index(opt_model_preds, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    model_results = pd.DataFrame({
        'Model': [model],
        'C-Index' : [model_c_index],
        'PI_80_Distance' : [pi_80_distance],
        'PI_95_Distance' : [pi_95_distance],
        'Best_C_Index_80' : [opt_80_model_c_index],
        'Best_C_Index_95' : [opt_95_model_c_index]
        })
    
    if evaluation_results is None:
        evaluation_results = model_results.copy()
    else:
        evaluation_results = evaluation_results.append(model_results)


DeepSurv_MC
Correct number instances?
True
Correct IDs?
True
DeepHit_MC
Correct number instances?
True
Correct IDs?
True
COX_BAYES
Correct number instances?
True
Correct IDs?
True


In [5]:
evaluation_results = evaluation_results.set_index('Model')
evaluation_results

Unnamed: 0_level_0,C-Index,PI_80_Distance,PI_95_Distance,Best_C_Index_80,Best_C_Index_95
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DeepSurv_MC,0.798084,0.016298,0.026575,0.944295,0.969935
DeepHit_MC,0.804931,0.026259,0.043158,0.962961,0.985239
COX_BAYES,0.783946,0.000606,0.000929,0.801664,0.810652


In [10]:
#evaluation_results.to_csv('results_table.csv')

## Subsample Predictions

### 10k Training Samples

In [53]:
subsample_experiment = '10k'

In [54]:
# import sample predictions
con = sqlite3.connect('./predictions/test_evaluation_'+subsample_experiment +'.sqlite')
surv_predictions = pd.read_sql('SELECT * FROM TB02_PREDICTIONS', con)
surv_predictions.ID = pd.to_numeric(surv_predictions.ID)
con.close()

In [60]:
models = ['DeepSurv_MC', 'DeepHit_MC', 'COX_BAYES']

defaults_ids = test_outcomes[test_outcomes.EVENT == 1].ID

evaluation_results = None

for model in models:
    
    print(model)
    
    model_predictions = surv_predictions[surv_predictions.MODEL == model]
    
    # transform into wide format for C-Index calculation
    wide_model_surv = model_predictions[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    
    ## Sanity Checks
    print("Correct number instances?")
    print(wide_model_surv.shape[1] == test_outcomes.ID.max())
    
    print("Correct IDs?")
    print(sum(list(wide_model_surv.columns)) == test_outcomes.ID.sum())
    
    
    ## C-Index
    model_c_index = get_c_index(wide_model_surv, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    ## Prediction Interval
    # Distance of PI
    pi_80_distance = (model_predictions.PRED_80_HIGH - model_predictions.PRED_80_LOW).mean()
    pi_95_distance = (model_predictions.PRED_95_HIGH - model_predictions.PRED_95_LOW).mean()
    
    # Quality of PI
    # 80 % PI
    defaults = surv_predictions.loc[surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_80_LOW']]
    non_defaults = surv_predictions.loc[~surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_80_HIGH']]

    defaults.columns = ['ID','PERIOD','PRED_MEAN']
    non_defaults.columns = ['ID','PERIOD','PRED_MEAN']

    opt_model_preds = defaults.append(non_defaults)
    opt_model_preds = opt_model_preds.sort_values(['ID','PERIOD'])

    opt_model_preds = opt_model_preds[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    opt_80_model_c_index = get_c_index(opt_model_preds, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    # 95 % PI
    defaults = surv_predictions.loc[surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_95_LOW']]
    non_defaults = surv_predictions.loc[~surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_95_HIGH']]

    defaults.columns = ['ID','PERIOD','PRED_MEAN']
    non_defaults.columns = ['ID','PERIOD','PRED_MEAN']

    opt_model_preds = defaults.append(non_defaults)
    opt_model_preds = opt_model_preds.sort_values(['ID','PERIOD'])

    opt_model_preds = opt_model_preds[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    opt_95_model_c_index = get_c_index(opt_model_preds, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    model_results = pd.DataFrame({
        'Model': [model],
        'C-Index' : [model_c_index],
        'PI_80_Distance' : [pi_80_distance],
        'PI_95_Distance' : [pi_95_distance],
        'Best_C_Index_80' : [opt_80_model_c_index],
        'Best_C_Index_95' : [opt_95_model_c_index]
        })
    
    if evaluation_results is None:
        evaluation_results = model_results.copy()
    else:
        evaluation_results = evaluation_results.append(model_results)

evaluation_results

DeepSurv_MC
Correct number instances?
True
Correct IDs?
True
DeepHit_MC
Correct number instances?
True
Correct IDs?
True
COX_BAYES
Correct number instances?
True
Correct IDs?
True


Unnamed: 0,Model,C-Index,PI_80_Distance,PI_95_Distance,Best_C_Index_80,Best_C_Index_95
0,DeepSurv_MC,0.786703,0.005541,0.008433,0.982588,0.985469
0,DeepHit_MC,0.79066,0.053302,0.09249,0.990375,0.996404
0,COX_BAYES,0.78484,0.003201,0.004896,0.872803,0.905446


### 1k Samples

In [62]:
subsample_experiment = '1k'

In [63]:
con = sqlite3.connect('./predictions/test_evaluation_'+subsample_experiment +'.sqlite')
surv_predictions = pd.read_sql('SELECT * FROM TB02_PREDICTIONS', con)
surv_predictions.ID = pd.to_numeric(surv_predictions.ID)
con.close()

In [65]:
models = ['DeepSurv_MC', 'DeepHit_MC', 'COX_BAYES']

defaults_ids = test_outcomes[test_outcomes.EVENT == 1].ID

evaluation_results = None

for model in models:
    
    print(model)
    
    model_predictions = surv_predictions[surv_predictions.MODEL == model]
    
    # transform into wide format for C-Index calculation
    wide_model_surv = model_predictions[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    
    ## Sanity Checks
    print("Correct number instances?")
    print(wide_model_surv.shape[1] == test_outcomes.ID.max())
    
    print("Correct IDs?")
    print(sum(list(wide_model_surv.columns)) == test_outcomes.ID.sum())
    
    
    ## C-Index
    model_c_index = get_c_index(wide_model_surv, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    ## Prediction Interval
    # Distance of PI
    pi_80_distance = (model_predictions.PRED_80_HIGH - model_predictions.PRED_80_LOW).mean()
    pi_95_distance = (model_predictions.PRED_95_HIGH - model_predictions.PRED_95_LOW).mean()
    
    # Quality of PI
    # 80 % PI
    defaults = surv_predictions.loc[surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_80_LOW']]
    non_defaults = surv_predictions.loc[~surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_80_HIGH']]

    defaults.columns = ['ID','PERIOD','PRED_MEAN']
    non_defaults.columns = ['ID','PERIOD','PRED_MEAN']

    opt_model_preds = defaults.append(non_defaults)
    opt_model_preds = opt_model_preds.sort_values(['ID','PERIOD'])

    opt_model_preds = opt_model_preds[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    opt_80_model_c_index = get_c_index(opt_model_preds, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    # 95 % PI
    defaults = surv_predictions.loc[surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_95_LOW']]
    non_defaults = surv_predictions.loc[~surv_predictions.ID.isin(defaults_ids) & (surv_predictions.MODEL == model), ['ID','PERIOD','PRED_95_HIGH']]

    defaults.columns = ['ID','PERIOD','PRED_MEAN']
    non_defaults.columns = ['ID','PERIOD','PRED_MEAN']

    opt_model_preds = defaults.append(non_defaults)
    opt_model_preds = opt_model_preds.sort_values(['ID','PERIOD'])

    opt_model_preds = opt_model_preds[['ID','PERIOD','PRED_MEAN']].pivot_table(index='PERIOD',columns='ID', values='PRED_MEAN')
    opt_95_model_c_index = get_c_index(opt_model_preds, np.array(test_outcomes[time_var]), np.array(test_outcomes.EVENT))
    
    model_results = pd.DataFrame({
        'Model': [model],
        'C-Index' : [model_c_index],
        'PI_80_Distance' : [pi_80_distance],
        'PI_95_Distance' : [pi_95_distance],
        'Best_C_Index_80' : [opt_80_model_c_index],
        'Best_C_Index_95' : [opt_95_model_c_index]
        })
    
    if evaluation_results is None:
        evaluation_results = model_results.copy()
    else:
        evaluation_results = evaluation_results.append(model_results)

evaluation_results

DeepSurv_MC
Correct number instances?
True
Correct IDs?
True
DeepHit_MC
Correct number instances?
True
Correct IDs?
True
COX_BAYES
Correct number instances?
True
Correct IDs?
True


Unnamed: 0,Model,C-Index,PI_80_Distance,PI_95_Distance,Best_C_Index_80,Best_C_Index_95
0,DeepSurv_MC,0.759338,0.010341,0.016518,0.94304,0.964657
0,DeepHit_MC,0.514465,0.058752,0.092949,0.930547,0.942015
0,COX_BAYES,0.713491,0.003895,0.006199,0.891068,0.935592
