In [4]:
import numpy, pandas, pathlib

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from skops.io import load

from sklearn.utils import resample

from misc import construct_line

pandas.options.display.max_columns=100

from statsmodels.stats.weightstats import ztest as ztest

pathlib.Path('pdf/figure-3').mkdir(exist_ok=True)

In [5]:
best_model = {}
for model in ['LR', 'NN', 'XB']:
    best_model[model] = load('models/'+model.lower()+'.skops', trusted=True)

# load the training dataset
X={}
Y={}
Z={}
X['train']={}
Y['train']={}
Z['train']={}
with open('data/ds-train.npy', 'rb') as f:
    Y['train']['input'] = numpy.load(f)
    X['train']['input'] = numpy.load(f)
    Z['train']['input'] = numpy.load(f, allow_pickle=True)

# load the results for the training dataset
results = pandas.read_csv('results-training.csv')
results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_mean,diagnostic_odds_ratio_std
0,LR,train,78.383333,6.571001,84.562446,5.01684,82.349734,7.055214,178,40,49,197,"{""C"": 1.0, ""penalty"": ""l1"", ""solver"": ""libline...",17.890816,0
1,NN,train,78.083333,8.534977,83.299026,4.003191,80.827359,6.939865,218,0,0,246,"{""activation"": ""tanh"", ""alpha"": 0.1, ""hidden_l...",inf,0
2,XB,train,78.85,7.291338,85.730087,4.644319,84.322392,7.940583,196,22,32,214,"{""learning_rate"": 0.125, ""max_depth"": 2, ""min_...",59.579545,0
3,SP,train,97.96748,,95.412844,,,,208,10,5,241,,1002.56,0


In [6]:
for metric in ['sensitivity', 'specificity', 'roc_auc', 'diagnostic_odds_ratio']:
    colour='#888888'
    fig = plt.figure(figsize=(2.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=range(4)
    x=results[(results.model!='SP')].model
    y=results[(results.model!='SP')][metric+'_mean']
    # these are standard deviations so convert to standard error at 95% given n=10
    e=results[(results.model!='SP')][metric+'_std']*1.96/10**0.5
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
        for (i,j,k) in zip(x,y,e):
            axes.text(i,j+k+2,'%.1f' % j,ha='center',color=colour)

    else:
        for (i,j) in zip(x,y):
            axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

    fig.savefig('pdf/figure-3/fig-3-train-'+metric+'.pdf', bbox_inches="tight")
    plt.close()

posx and posy should be finite values
posx and posy should be finite values


In [7]:
for metric in ['sensitivity', 'specificity']:
    colour=['#888888','#888888','#888888','pink']
    fig = plt.figure(figsize=(3.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=results.model
    y=results[metric+'_mean']
    # these are standard deviations so convert to standard error at 95% given n=10
    e=results[metric+'_std']*1.96/10**0.5
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color='#888888',linewidth=2)
        for (i,j,k,c) in zip(x,y,e,colour):
            if k>0:
                axes.text(i,j+k+2,'%.1f' % j,ha='center',color=c)
            else:
                axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    else:
        for (i,j,c) in zip(x,y,colour):                
            axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    fig.savefig('pdf/figure-3/fig-3-train-'+metric+'-suspectpza.pdf', bbox_inches="tight")
    plt.close()

In [8]:
for idx,row in results.iterrows():

    fig = plt.figure(figsize=(1.5, 1.5))
    axes = plt.gca()

    axes.add_patch(Rectangle((0,0),1,1,fc='#e41a1c',alpha=0.7))
    axes.add_patch(Rectangle((0,1),1,1,fc='#4daf4a',alpha=0.7))
    axes.add_patch(Rectangle((1,1),1,1,fc='#fc9272',alpha=0.7))
    axes.add_patch(Rectangle((1,0),1,1,fc='#4daf4a',alpha=0.7))

    axes.set_xlim([0,2])
    axes.set_ylim([0,2])

    axes.set_xticks([0.5,1.5],labels=['R','S'])
    axes.set_yticks([0.5,1.5],labels=['S','R'])

    axes.text(0.5,0.5,row['FN'],ha='center',va='center')
    axes.text(1.5,0.5,row['TN'],ha='center',va='center')
    axes.text(0.5,1.5,row['TP'],ha='center',va='center')
    axes.text(1.5,1.5,row['FP'],ha='center',va='center')

    fig.savefig('pdf/figure-3/truthtable-'+row['dataset']+'-'+row['model']+'.pdf', bbox_inches='tight')
    plt.close()

### Model validation on `test` set

We can now evaluate the trained models on the `test` dataset.

First let's load the trained models, the datasets and the results for Suspect-PZA


In [9]:
for model in ['LR', 'NN', 'XB']:
    best_model[model] = load('models/'+model.lower()+'.skops', trusted=True)

# load the test dataset
X['test']={}
Y['test']={}
Z['test']={}
with open('data/ds-test.npy', 'rb') as f:
    Y['test']['input'] = numpy.load(f)
    X['test']['input'] = numpy.load(f)
    Z['test']['input'] = numpy.load(f, allow_pickle=True)

# load in the results for SuspectPZA
suspectpza={}
for i in ['test']:
    suspectpza[i]={}
    with open('data/suspectpza-'+i+'.npy', 'rb') as f:
        suspectpza[i]['input'] = numpy.load(f)
        suspectpza[i]['predicted'] = numpy.load(f)
        suspectpza[i]['muts'] = numpy.load(f, allow_pickle=True)
line = []

def validate_model(line, best_model, model_name, X, Y):
        
    Y['test']['predicted'] = best_model.predict(X['test']['input'])
    Y['test']['scores'] = best_model.predict_proba(X['test']['input'])[:,1]
    row = construct_line(model_name , 'test', None, Y['test'], None)
    line.append(row)
    return(line)

for model in ['LR', 'NN', 'XB']:
    line = validate_model(line, best_model[model], model, X, Y)

line.append(construct_line('SP', 'test', None, suspectpza['test'], None))

# load the results for the training dataset
# results = pandas.read_csv('results-training.csv')
# results

In [10]:
def bootstrap_model(line, best_model, model_name, X, Y):
    x={}
    y={}
    for i in range(10):
        x['input'],y['input'] = resample(X['test']['input'], Y['test']['input'])
        y['predicted'] = best_model.predict(x['input'])
        y['scores'] = best_model.predict_proba(x['input'])[:,1]
        row = construct_line(model_name, 'test_'+str(i), None, y, None)
        line.append(row)
    return(line)

In [11]:
for model in ['LR', 'NN', 'XB']:
    line = bootstrap_model(line, best_model[model], model, X, Y)

In [12]:
test_results = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])
test_results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters
0,LR,test,77.669903,,71.134021,,82.594335,,69,28,23,80,
1,NN,test,83.495146,,60.824742,,79.551596,,59,38,17,86,
2,XB,test,77.669903,,78.350515,,82.684416,,76,21,23,80,
3,SP,test,95.145631,,97.938144,,,,95,2,5,98,
4,LR,test_0,80.188679,,69.148936,,82.055399,,65,29,21,85,
5,LR,test_1,78.095238,,80.0,,85.974937,,76,19,23,82,
6,LR,test_2,73.958333,,70.192308,,82.642228,,73,31,25,71,
7,LR,test_3,86.0,,66.0,,86.11,,66,34,14,86,
8,LR,test_4,76.415094,,61.702128,,78.141309,,58,36,25,81,
9,LR,test_5,77.884615,,68.75,,79.927885,,66,30,23,81,


In [13]:
line = []
for model in ['LR', 'NN', 'XB']:
    sens_mean = test_results[(test_results.model==model) & (test_results.dataset!='test')].sensitivity_mean.mean()
    sens_std = 1.96*test_results[(test_results.model==model) & (test_results.dataset!='test')].sensitivity_mean.std()/10**0.5
    spec_mean = test_results[(test_results.model==model) & (test_results.dataset!='test')].specificity_mean.mean()
    spec_std = 1.96*test_results[(test_results.model==model) & (test_results.dataset!='test')].specificity_mean.std()/10**0.5
    roc_mean = test_results[(test_results.model==model) & (test_results.dataset!='test')].roc_auc_mean.mean()
    row = [model, 'bootstrapped', sens_mean, sens_std, spec_mean, spec_std, roc_mean, None, None, None, None, None, None ]
    line.append(row)

extra_rows = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])

test_results  = pandas.concat([test_results, extra_rows])

In [14]:
test_results['diagnostic_odds_ratio_mean'] = (test_results['TN']*test_results['TP'])/(test_results['FN']*test_results['FP'])
test_results['diagnostic_odds_ratio_std'] = 0
test_results.to_csv('results-test.csv', index=False)
test_results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_mean,diagnostic_odds_ratio_std
0,LR,test,77.669903,,71.134021,,82.594335,,69.0,28.0,23.0,80.0,,8.571429,0
1,NN,test,83.495146,,60.824742,,79.551596,,59.0,38.0,17.0,86.0,,7.854489,0
2,XB,test,77.669903,,78.350515,,82.684416,,76.0,21.0,23.0,80.0,,12.587992,0
3,SP,test,95.145631,,97.938144,,,,95.0,2.0,5.0,98.0,,931.0,0
4,LR,test_0,80.188679,,69.148936,,82.055399,,65.0,29.0,21.0,85.0,,9.07225,0
5,LR,test_1,78.095238,,80.0,,85.974937,,76.0,19.0,23.0,82.0,,14.26087,0
6,LR,test_2,73.958333,,70.192308,,82.642228,,73.0,31.0,25.0,71.0,,6.687742,0
7,LR,test_3,86.0,,66.0,,86.11,,66.0,34.0,14.0,86.0,,11.92437,0
8,LR,test_4,76.415094,,61.702128,,78.141309,,58.0,36.0,25.0,81.0,,5.22,0
9,LR,test_5,77.884615,,68.75,,79.927885,,66.0,30.0,23.0,81.0,,7.747826,0


In [21]:
for metric in ['sensitivity_mean', 'specificity_mean']:
    for i in ['XB', 'NN', 'LR']:
        for j in ['XB', 'NN', 'LR']:
            if i!=j:
                a = numpy.array(test_results[(test_results.model==i)& (test_results.dataset.str.contains('test_'))][metric])
                b = numpy.array(test_results[(test_results.model==j)& (test_results.dataset.str.contains('test_'))][metric])
                tstat, pvalue = ztest(a,b)
                if pvalue < 0.05:
                    print(metric, i,j,"Signficant",pvalue)
    print()

sensitivity_mean XB NN Signficant 7.109146110494462e-06
sensitivity_mean NN XB Signficant 7.109146110494462e-06
sensitivity_mean NN LR Signficant 4.5664359327664415e-08
sensitivity_mean LR NN Signficant 4.5664359327664415e-08

specificity_mean XB NN Signficant 1.845677334396293e-14
specificity_mean XB LR Signficant 5.606855647429165e-05
specificity_mean NN XB Signficant 1.845677334396293e-14
specificity_mean NN LR Signficant 0.0033715779603818077
specificity_mean LR XB Signficant 5.606855647429165e-05
specificity_mean LR NN Signficant 0.0033715779603818077



In [19]:
for metric in ['sensitivity', 'specificity', 'roc_auc']:
    colour='#888888'
    fig = plt.figure(figsize=(2.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=range(4)
    x=test_results[(test_results.model!='SP') & (test_results.dataset=='bootstrapped')].model
    y=test_results[(test_results.model!='SP') & (test_results.dataset=='bootstrapped')][metric+'_mean']
    e=test_results[(test_results.model!='SP') & (test_results.dataset=='bootstrapped')][metric+'_std']
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
        for (i,j,k) in zip(x,y,e):
            axes.text(i,j+k+2,'%.1f' % j,ha='center',color=colour)

    else:
        for (i,j) in zip(x,y):
            axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

    fig.savefig('pdf/figure-3/fig-3-test-'+metric+'.pdf', bbox_inches="tight")
    plt.close()

for metric in ['diagnostic_odds_ratio']:
    colour='#888888'
    fig = plt.figure(figsize=(2.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=range(4)
    x=test_results[(test_results.model!='SP') & (test_results.dataset=='test')].model
    y=test_results[(test_results.model!='SP') & (test_results.dataset=='test')][metric+'_mean']
    e=test_results[(test_results.model!='SP') & (test_results.dataset=='test')][metric+'_std']
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
        for (i,j,k) in zip(x,y,e):
            axes.text(i,j+k+2,'%.1f' % j,ha='center',color=colour)

    else:
        for (i,j) in zip(x,y):
            axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

    fig.savefig('pdf/figure-3/fig-3-test-'+metric+'.pdf', bbox_inches="tight")
    plt.close()    

In [22]:
for metric in ['sensitivity', 'specificity']:
    colour=['#888888','#888888','#888888','pink']
    fig = plt.figure(figsize=(3.2, 3.5))
    axes = plt.gca()
    axes.spines['top'].set_visible(False)
    axes.spines['right'].set_visible(False)
    axes.spines['left'].set_visible(False)
    axes.get_yaxis().set_visible(False)
    x=test_results[(test_results.dataset=='bootstrapped')].model
    y=test_results[(test_results.dataset=='bootstrapped')][metric+'_mean']
    e=test_results[(test_results.dataset=='bootstrapped')][metric+'_std']
    axes.set_ylim([0,100])
    axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

    if e.sum()>0:
        axes.errorbar(x,y,yerr=e, fmt='.',color='#888888',linewidth=2)
        for (i,j,k,c) in zip(x,y,e,colour):
            if k>0:
                axes.text(i,j+k+2,'%.1f' % j,ha='center',color=c)
            else:
                axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    else:
        for (i,j,c) in zip(x,y,colour):                
            axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

    fig.savefig('pdf/figure-3/fig-3-test-'+metric+'-suspectpza.pdf', bbox_inches="tight")
    plt.close()

In [23]:
for idx,row in test_results[test_results.dataset=='test'].iterrows():

    fig = plt.figure(figsize=(1.5, 1.5))
    axes = plt.gca()

    axes.add_patch(Rectangle((0,0),1,1,fc='#e41a1c',alpha=0.7))
    axes.add_patch(Rectangle((0,1),1,1,fc='#4daf4a',alpha=0.7))
    axes.add_patch(Rectangle((1,1),1,1,fc='#fc9272',alpha=0.7))
    axes.add_patch(Rectangle((1,0),1,1,fc='#4daf4a',alpha=0.7))

    axes.set_xlim([0,2])
    axes.set_ylim([0,2])

    axes.set_xticks([0.5,1.5],labels=['R','S'])
    axes.set_yticks([0.5,1.5],labels=['S','R'])

    axes.text(0.5,0.5,row['FN'],ha='center',va='center')
    axes.text(1.5,0.5,row['TN'],ha='center',va='center')
    axes.text(0.5,1.5,row['TP'],ha='center',va='center')
    axes.text(1.5,1.5,row['FP'],ha='center',va='center')

    fig.savefig('pdf/figure-3/truthtable-'+row['dataset']+'-'+row['model']+'.pdf', bbox_inches='tight')
    plt.close()