In [72]:
import numpy, pandas, pathlib

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from skops.io import load

pathlib.Path('pdf/figure-5').mkdir(exist_ok=True)

### Setup

First let's load the results from applying the models to the different Validation datasets

In [73]:
# load the results for the training dataset
results = pandas.read_csv('results-validation.csv')
results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_mean,diagnostic_odds_ratio_std
0,NN,validation-samples,90.517241,,51.649236,,73.731156,,642.0,601.0,264.0,2520.0,,0,0
1,NN,validation-samplesnoU,91.479239,,66.666667,,80.821306,,538.0,269.0,197.0,2115.0,,0,0
2,NN,validation-mutations,94.193548,,52.272727,,77.565982,,23.0,21.0,9.0,146.0,,0,0
3,NN,mic,94.000000,,42.857143,,63.142857,,3.0,4.0,3.0,47.0,,0,0
4,XB,validation-samples,97.198276,,46.017699,,80.722592,,572.0,671.0,78.0,2706.0,,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1223,NN,bootstrapped-validation-mutations,94.359947,0.365648,51.587111,1.450498,77.187250,,,,,,,0,0
1224,XB,bootstrapped-validation-mutations,97.443912,0.256103,58.732489,1.508175,88.496617,,,,,,,0,0
1225,LR,bootstrapped-mic,100.000000,0.000000,0.000000,0.000000,68.560470,,,,,,,0,0
1226,NN,bootstrapped-mic,94.142869,0.673937,37.686291,3.895711,60.072341,,,,,,,0,0


Plot some bar charts of the different performance metrics for each the different datasets

In [74]:
results[results.dataset=='bootstrapped-validation-samples']

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_mean,diagnostic_odds_ratio_std
1216,LR,bootstrapped-validation-samples,98.084273,0.047566,40.496874,0.297829,79.732964,,,,,,,0,0
1217,NN,bootstrapped-validation-samples,90.574313,0.090719,51.523599,0.298339,73.712663,,,,,,,0,0
1218,XB,bootstrapped-validation-samples,97.172889,0.057947,46.343021,0.282609,80.781792,,,,,,,0,0


In [75]:
for metric in ['sensitivity', 'specificity', 'roc_auc', 'diagnostic_odds_ratio']:
    for dataset in ['validation-samples', 'validation-samplesnoU', 'validation-mutations']:
        colour='#888888'
        fig = plt.figure(figsize=(2.2, 3.5))
        axes = plt.gca()
        axes.spines['top'].set_visible(False)
        axes.spines['right'].set_visible(False)
        axes.spines['left'].set_visible(False)
        axes.get_yaxis().set_visible(False)
        x=range(4)
        x=results[(results.dataset=='bootstrapped-'+dataset) & (results.model!='SP')].model
        y=results[(results.dataset=='bootstrapped-'+dataset) & (results.model!='SP')][metric+'_mean']
        e=results[(results.dataset=='bootstrapped-'+dataset) & (results.model!='SP')][metric+'_std']
        axes.set_ylim([0,100])
        axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

        if e.sum()>0:
            axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
            for (i,j) in zip(x,y+e):
                axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

        else:
            for (i,j) in zip(x,y):
                axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

        fig.savefig('pdf/figure-5/fig-5-'+dataset+'-'+metric+'.pdf',bbox_inches='tight',transparent=True)
        plt.close()

Repeat, but include the results of SuspectPZA

In [76]:
for metric in ['sensitivity', 'specificity']:
    for dataset in ['validation-samples', 'validation-samplesnoU', 'validation-mutations']:
        colour=['#888888','#888888','#888888','pink']
        fig = plt.figure(figsize=(3.2, 3.5))
        axes = plt.gca()
        axes.spines['top'].set_visible(False)
        axes.spines['right'].set_visible(False)
        axes.spines['left'].set_visible(False)
        axes.get_yaxis().set_visible(False)
        x=range(4)
        x=results[(results.dataset=='bootstrapped-'+dataset)].model
        y=results[(results.dataset=='bootstrapped-'+dataset)][metric+'_mean']
        e=results[(results.dataset=='bootstrapped-'+dataset)][metric+'_std']
        axes.set_ylim([0,100])
        axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

        if e.sum()>0:
            axes.errorbar(x,y,yerr=e, fmt='.',color='#888888',linewidth=2)
            for (i,j,e,c) in zip(x,y,e,colour):
                if e>0:
                    axes.text(i,j+e+2,'%.1f' % j,ha='center',color=c)
                else:
                    axes.text(i,j+2,'%.1f' % j,ha='center',color=c)
 
        else:
            for (i,j,c) in zip(x,y,colour):                
                axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

        fig.savefig('pdf/figure-5/fig-5-'+dataset+'-'+metric+'-suspectpza.pdf',bbox_inches='tight',transparent=True)
        plt.close()

In [77]:

for i in ['validation-samples', 'validation-samplesnoU', 'validation-mutations', 'mic']:
    for model in ['LR', 'NN', 'XB']:

        df = results[(results.model==model) & (results.dataset==i)]

        for idx, row in df.iterrows():

            fig = plt.figure(figsize=(1.5, 1.5))
            axes = plt.gca()

            axes.add_patch(Rectangle((0,0),1,1,fc='#e41a1c',alpha=0.7))
            axes.add_patch(Rectangle((0,1),1,1,fc='#4daf4a',alpha=0.7))
            axes.add_patch(Rectangle((1,1),1,1,fc='#fc9272',alpha=0.7))
            axes.add_patch(Rectangle((1,0),1,1,fc='#4daf4a',alpha=0.7))

            axes.set_xlim([0,2])
            axes.set_ylim([0,2])

            axes.set_xticks([0.5,1.5],labels=['R','S'])
            axes.set_yticks([0.5,1.5],labels=['S','R'])

            axes.text(0.5,0.5,int(row['FN']),ha='center',va='center')
            axes.text(1.5,0.5,int(row['TN']),ha='center',va='center')
            axes.text(0.5,1.5,int(row['TP']),ha='center',va='center')
            axes.text(1.5,1.5,int(row['FP']),ha='center',va='center')

            fig.savefig('pdf/figure-5/truthtable-'+row['dataset']+'-'+row['model']+'.pdf', bbox_inches='tight')
            plt.close()