In [17]:
import numpy, pandas, pathlib

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from skops.io import load

from sklearn.utils import resample

from misc import construct_line

pandas.options.display.max_columns=100

from statsmodels.stats.weightstats import ztest as ztest

pathlib.Path('pdf/figure-3').mkdir(exist_ok=True)

number_of_bootstraps = 100

In [18]:
best_model = {}
for model in ['LR', 'NN', 'XB']:
    best_model[model] = load('models/'+model.lower()+'.skops', trusted=True)

# load the training dataset
X={}
Y={}
Z={}
X['train']={}
Y['train']={}
Z['train']={}
with open('data/ds-train.npy', 'rb') as f:
    Y['train']['input'] = numpy.load(f)
    X['train']['input'] = numpy.load(f)
    Z['train']['input'] = numpy.load(f, allow_pickle=True)

# load the results for the training dataset
results = pandas.read_csv('results-training.csv')
results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_mean,diagnostic_odds_ratio_std
0,LR,train,78.766667,7.969804,84.635931,4.924239,82.734349,6.631325,178,40,47,199,"{""C"": 0.1, ""penalty"": ""l2"", ""solver"": ""libline...",18.841489,0
1,NN,train,77.7,9.539043,83.269372,3.173211,78.364529,5.500773,213,5,1,245,"{""activation"": ""relu"", ""alpha"": 1e-06, ""hidden...",10437.0,0
2,XB,train,79.266667,7.800356,85.649675,4.91675,84.261753,6.800008,196,22,26,220,"{""learning_rate"": 0.125, ""max_depth"": 4, ""min_...",75.384615,0
3,SP,train,97.96748,,95.412844,,,,208,10,5,241,,1002.56,0


In [19]:
for metric in ['sensitivity', 'specificity', 'roc_auc', 'diagnostic_odds_ratio']:
    colour='#888888'
    fig = plt.figure(figsize=(2.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=range(4)
    x=results[(results.model!='SP')].model
    y=results[(results.model!='SP')][metric+'_mean']
    # these are standard deviations so convert to standard error at 95% given number_of_bootstraps
    e=results[(results.model!='SP')][metric+'_std']*1.96/(number_of_bootstraps**0.5)
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
        for (i,j,k) in zip(x,y,e):
            axes.text(i,j+k+2,'%.1f' % j,ha='center',color=colour)

    else:
        for (i,j) in zip(x,y):
            axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

    fig.savefig('pdf/figure-3/fig-3-train-'+metric+'.pdf', ,bbox_inches='tight',transparent=True)
    plt.close()

In [20]:
for metric in ['sensitivity', 'specificity']:
    colour=['#888888','#888888','#888888','pink']
    fig = plt.figure(figsize=(3.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=results.model
    y=results[metric+'_mean']
    # these are standard deviations so convert to standard error at 95% given number_of_bootstraps
    e=results[metric+'_std']*1.96/(number_of_bootstraps**0.5)
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color='#888888',linewidth=2)
        for (i,j,k,c) in zip(x,y,e,colour):
            if k>0:
                axes.text(i,j+k+2,'%.1f' % j,ha='center',color=c)
            else:
                axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    else:
        for (i,j,c) in zip(x,y,colour):                
            axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    fig.savefig('pdf/figure-3/fig-3-train-'+metric+'-suspectpza.pdf',,bbox_inches='tight',transparent=True)
    plt.close()

In [21]:
for idx,row in results.iterrows():

    fig = plt.figure(figsize=(1.5, 1.5))
    axes = plt.gca()

    axes.add_patch(Rectangle((0,0),1,1,fc='#e41a1c',alpha=0.7))
    axes.add_patch(Rectangle((0,1),1,1,fc='#4daf4a',alpha=0.7))
    axes.add_patch(Rectangle((1,1),1,1,fc='#fc9272',alpha=0.7))
    axes.add_patch(Rectangle((1,0),1,1,fc='#4daf4a',alpha=0.7))

    axes.set_xlim([0,2])
    axes.set_ylim([0,2])

    axes.set_xticks([0.5,1.5],labels=['R','S'])
    axes.set_yticks([0.5,1.5],labels=['S','R'])

    axes.text(0.5,0.5,row['FN'],ha='center',va='center')
    axes.text(1.5,0.5,row['TN'],ha='center',va='center')
    axes.text(0.5,1.5,row['TP'],ha='center',va='center')
    axes.text(1.5,1.5,row['FP'],ha='center',va='center')

    fig.savefig('pdf/figure-3/truthtable-'+row['dataset']+'-'+row['model']+'.pdf',bbox_inches='tight',transparent=True)
    plt.close()

### Model validation on `test` set

We can now evaluate the trained models on the `test` dataset.

First let's load the trained models, the datasets and the results for Suspect-PZA


In [22]:
for model in ['LR', 'NN', 'XB']:
    best_model[model] = load('models/'+model.lower()+'.skops', trusted=True)

# load the test dataset
X['test']={}
Y['test']={}
Z['test']={}
with open('data/ds-test.npy', 'rb') as f:
    Y['test']['input'] = numpy.load(f)
    X['test']['input'] = numpy.load(f)
    Z['test']['input'] = numpy.load(f, allow_pickle=True)

# load in the results for SuspectPZA
suspectpza={}
for i in ['test']:
    suspectpza[i]={}
    with open('data/suspectpza-'+i+'.npy', 'rb') as f:
        suspectpza[i]['input'] = numpy.load(f)
        suspectpza[i]['predicted'] = numpy.load(f)
        suspectpza[i]['muts'] = numpy.load(f, allow_pickle=True)
line = []

def validate_model(line, best_model, model_name, X, Y):
        
    Y['test']['predicted'] = best_model.predict(X['test']['input'])
    Y['test']['scores'] = best_model.predict_proba(X['test']['input'])[:,1]
    row = construct_line(model_name , 'test', None, Y['test'], None)
    line.append(row)
    return(line)

for model in ['LR', 'NN', 'XB']:
    line = validate_model(line, best_model[model], model, X, Y)

line.append(construct_line('SP', 'test', None, suspectpza['test'], None))

# load the results for the training dataset
# results = pandas.read_csv('results-training.csv')
# results

In [23]:
def bootstrap_model(line, best_model, model_name, X, Y):
    x={}
    y={}
    for i in range(number_of_bootstraps):
        x['input'],y['input'] = resample(X['test']['input'], Y['test']['input'])
        y['predicted'] = best_model.predict(x['input'])
        y['scores'] = best_model.predict_proba(x['input'])[:,1]
        row = construct_line(model_name, 'test_'+str(i), None, y, None)
        line.append(row)
    return(line)

In [24]:
for model in ['LR', 'NN', 'XB']:
    line = bootstrap_model(line, best_model[model], model, X, Y)

test_results = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])
test_results[:3]

In [26]:
line = []
for model in ['LR', 'NN', 'XB']:
    sens_mean = test_results[(test_results.model==model) & (test_results.dataset!='test')].sensitivity_mean.mean()
    sens_std = 1.96*test_results[(test_results.model==model) & (test_results.dataset!='test')].sensitivity_mean.std()/(number_of_bootstraps**0.5)
    spec_mean = test_results[(test_results.model==model) & (test_results.dataset!='test')].specificity_mean.mean()
    spec_std = 1.96*test_results[(test_results.model==model) & (test_results.dataset!='test')].specificity_mean.std()/(number_of_bootstraps**0.5)
    roc_mean = test_results[(test_results.model==model) & (test_results.dataset!='test')].roc_auc_mean.mean()
    row = [model, 'bootstrapped', sens_mean, sens_std, spec_mean, spec_std, roc_mean, None, None, None, None, None, None ]
    line.append(row)

extra_rows = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])

test_results  = pandas.concat([test_results, extra_rows])

In [27]:
test_results['diagnostic_odds_ratio_mean'] = (test_results['TN']*test_results['TP'])/(test_results['FN']*test_results['FP'])
test_results['diagnostic_odds_ratio_std'] = 0
test_results.to_csv('results-test.csv', index=False)
test_results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_mean,diagnostic_odds_ratio_std
0,LR,test,79.611650,,72.164948,,82.784506,,70,27,21,82,,10.123457,0
1,NN,test,75.728155,,58.762887,,75.097588,,57,40,25,78,,4.446,0
2,XB,test,80.582524,,78.350515,,82.234011,,76,21,20,83,,15.019048,0
3,SP,test,95.145631,,97.938144,,,,95,2,5,98,,931.0,0
4,LR,test_0,83.695652,,76.851852,,84.541063,,83,25,15,77,,17.042667,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302,XB,test_98,85.714286,,76.470588,,85.614246,,78,24,14,84,,19.5,0
303,XB,test_99,80.188679,,88.297872,,86.310719,,83,11,21,85,,30.541126,0
0,LR,bootstrapped,79.794604,0.785374,71.922143,0.857657,82.627563,,,,,,,,0
1,NN,bootstrapped,75.954297,0.815777,59.592710,0.979476,75.743910,,,,,,,,0


In [28]:
for metric in ['sensitivity_mean', 'specificity_mean']:
    for i in ['XB', 'NN', 'LR']:
        for j in ['XB', 'NN', 'LR']:
            if i!=j:
                a = numpy.array(test_results[(test_results.model==i)& (test_results.dataset.str.contains('test_'))][metric])
                b = numpy.array(test_results[(test_results.model==j)& (test_results.dataset.str.contains('test_'))][metric])
                tstat, pvalue = ztest(a,b)
                if pvalue < 0.05:
                    print(metric, i,j,"Signficant",pvalue)
    print()

sensitivity_mean XB NN Signficant 5.550748294756133e-19
sensitivity_mean XB LR Signficant 0.026963982369483337
sensitivity_mean NN XB Signficant 5.550748294756133e-19
sensitivity_mean NN LR Signficant 2.991063439006395e-11
sensitivity_mean LR XB Signficant 0.026963982369483337
sensitivity_mean LR NN Signficant 2.991063439006395e-11

specificity_mean XB NN Signficant 2.4664877725431692e-175
specificity_mean XB LR Signficant 2.839479653001551e-24
specificity_mean NN XB Signficant 2.4664877725431692e-175
specificity_mean NN LR Signficant 6.543215565878555e-77
specificity_mean LR XB Signficant 2.839479653001551e-24
specificity_mean LR NN Signficant 6.543215565878555e-77



In [13]:
for metric in ['sensitivity', 'specificity', 'roc_auc']:
    colour='#888888'
    fig = plt.figure(figsize=(2.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=range(4)
    x=test_results[(test_results.model!='SP') & (test_results.dataset=='bootstrapped')].model
    y=test_results[(test_results.model!='SP') & (test_results.dataset=='bootstrapped')][metric+'_mean']
    e=test_results[(test_results.model!='SP') & (test_results.dataset=='bootstrapped')][metric+'_std']
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
        for (i,j,k) in zip(x,y,e):
            axes.text(i,j+k+2,'%.1f' % j,ha='center',color=colour)

    else:
        for (i,j) in zip(x,y):
            axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

    fig.savefig('pdf/figure-3/fig-3-test-'+metric+'.pdf',bbox_inches='tight',transparent=True)
    plt.close()

for metric in ['diagnostic_odds_ratio']:
    colour='#888888'
    fig = plt.figure(figsize=(2.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=range(4)
    x=test_results[(test_results.model!='SP') & (test_results.dataset=='test')].model
    y=test_results[(test_results.model!='SP') & (test_results.dataset=='test')][metric+'_mean']
    e=test_results[(test_results.model!='SP') & (test_results.dataset=='test')][metric+'_std']
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
        for (i,j,k) in zip(x,y,e):
            axes.text(i,j+k+2,'%.1f' % j,ha='center',color=colour)

    else:
        for (i,j) in zip(x,y):
            axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

    fig.savefig('pdf/figure-3/fig-3-test-'+metric+'.pdf',bbox_inches='tight',transparent=True)
    plt.close()    

In [14]:
for metric in ['sensitivity', 'specificity']:
    colour=['#888888','#888888','#888888','pink']
    fig = plt.figure(figsize=(3.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=test_results[(test_results.dataset=='bootstrapped')].model
    y=test_results[(test_results.dataset=='bootstrapped')][metric+'_mean']
    e=test_results[(test_results.dataset=='bootstrapped')][metric+'_std']
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color='#888888',linewidth=2)
        for (i,j,k,c) in zip(x,y,e,colour):
            if k>0:
                axes.text(i,j+k+2,'%.1f' % j,ha='center',color=c)
            else:
                axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    else:
        for (i,j,c) in zip(x,y,colour):                
            axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    fig.savefig('pdf/figure-3/fig-3-test-'+metric+'-suspectpza.pdf',bbox_inches='tight',transparent=True)
    plt.close()

In [15]:
for idx,row in test_results[test_results.dataset=='test'].iterrows():

    fig = plt.figure(figsize=(1.5, 1.5))
    axes = plt.gca()

    axes.add_patch(Rectangle((0,0),1,1,fc='#e41a1c',alpha=0.7))
    axes.add_patch(Rectangle((0,1),1,1,fc='#4daf4a',alpha=0.7))
    axes.add_patch(Rectangle((1,1),1,1,fc='#fc9272',alpha=0.7))
    axes.add_patch(Rectangle((1,0),1,1,fc='#4daf4a',alpha=0.7))

    axes.set_xlim([0,2])
    axes.set_ylim([0,2])

    axes.set_xticks([0.5,1.5],labels=['R','S'])
    axes.set_yticks([0.5,1.5],labels=['S','R'])

    axes.text(0.5,0.5,row['FN'],ha='center',va='center')
    axes.text(1.5,0.5,row['TN'],ha='center',va='center')
    axes.text(0.5,1.5,row['TP'],ha='center',va='center')
    axes.text(1.5,1.5,row['FP'],ha='center',va='center')

    fig.savefig('pdf/figure-3/truthtable-'+row['dataset']+'-'+row['model']+'.pdf',bbox_inches='tight',transparent=True)
    plt.close()