In [2]:
import numpy, pandas, pathlib

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib_venn import venn3

from skops.io import load

pathlib.Path('pdf/figure-5').mkdir(exist_ok=True)

### Setup

First let's load the trained models, the Test dataset and performance of the models on the Training dataset which was calculated and written to disc by the last notebook

In [3]:
results = pandas.read_csv('results-validation.csv')
results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_mean,diagnostic_odds_ratio_std
0,LR,validation-samples,97.557471,,43.845535,,80.006802,,545,698,68,2716,,31.186162,0
1,LR,validation-samples-noU,98.615917,,58.116481,,85.212409,,469,338,32,2280,,98.864645,0
2,LR,validation-mutations,97.419355,,50.0,,87.243402,,22,22,4,151,,37.75,0
3,LR,mic,100.0,,14.285714,,68.0,,1,6,0,50,,inf,0
4,NN,validation-samples,94.755747,,48.833467,,77.079895,,607,636,146,2638,,17.244615,0
5,NN,validation-samples-noU,96.237024,,63.07311,,82.534983,,509,298,87,2225,,43.682982,0
6,NN,validation-mutations,96.774194,,52.272727,,81.480938,,23,21,5,150,,32.857143,0
7,NN,mic,96.0,,28.571429,,69.714286,,2,5,2,48,,9.6,0
8,XB,validation-samples,97.413793,,44.328238,,80.775215,,551,692,72,2712,,29.991811,0
9,XB,validation-samples-noU,97.83737,,61.214374,,86.706366,,494,313,50,2262,,71.40115,0


Plot some bar charts of the different performance metrics for each the different datasets

In [5]:
for metric in ['sensitivity', 'specificity', 'roc_auc', 'diagnostic_odds_ratio']:
    for dataset in ['validation-samples', 'validation-samples-noU', 'validation-mutations']:
        colour='#888888'
        fig = plt.figure(figsize=(2.2, 3.5))
        axes = plt.gca()
        axes.spines['top'].set_visible(False)
        axes.spines['right'].set_visible(False)
        axes.spines['left'].set_visible(False)
        axes.get_yaxis().set_visible(False)
        x=range(4)
        x=results[(results.dataset==dataset) & (results.model!='SP')].model
        y=results[(results.dataset==dataset) & (results.model!='SP')][metric+'_mean']
        e=results[(results.dataset==dataset) & (results.model!='SP')][metric+'_std']
        axes.set_ylim([0,100])
        axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

        if e.sum()>0:
            axes.errorbar(x,y,yerr=e, fmt='.',color=colour,linewidth=2)
            for (i,j) in zip(x,y+e):
                axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

        else:
            for (i,j) in zip(x,y):
                axes.text(i,j+2,'%.1f' % j,ha='center',color=colour)

        fig.savefig('pdf/figure-5/fig-5-'+dataset+'-'+metric+'.pdf', bbox_inches="tight")
        plt.close()

Repeat, but include the results of SuspectPZA

In [6]:
for metric in ['sensitivity', 'specificity']:
    for dataset in ['validation-samples', 'validation-samples-noU', 'validation-mutations']:
        colour=['#888888','#888888','#888888','pink']
        fig = plt.figure(figsize=(3.2, 3.5))
        axes = plt.gca()
        axes.spines['top'].set_visible(False)
        axes.spines['right'].set_visible(False)
        axes.spines['left'].set_visible(False)
        axes.get_yaxis().set_visible(False)
        x=range(4)
        x=results[(results.dataset==dataset)].model
        y=results[(results.dataset==dataset)][metric+'_mean']
        e=results[(results.dataset==dataset)][metric+'_std']
        axes.set_ylim([0,100])
        axes.bar(x,y, label=y, edgecolor=colour, color='None',linewidth=2)

        if e.sum()>0:
            axes.errorbar(x,y,yerr=e, fmt='.',color='#888888',linewidth=2)
            for (i,j,e,c) in zip(x,y,e,colour):
                if e>0:
                    axes.text(i,j+e+2,'%.1f' % j,ha='center',color=c)
                else:
                    axes.text(i,j+2,'%.1f' % j,ha='center',color=c)
 
        else:
            for (i,j,c) in zip(x,y,colour):                
                axes.text(i,j+2,'%.1f' % j,ha='center',color=c)

        fig.savefig('pdf/figure-5/fig-5-'+dataset+'-'+metric+'-suspectpza.pdf', bbox_inches="tight")
        plt.close()

In [7]:
for idx,row in results.iterrows():

    fig = plt.figure(figsize=(1.5, 1.5))
    axes = plt.gca()

    axes.add_patch(Rectangle((0,0),1,1,fc='#e41a1c',alpha=0.7))
    axes.add_patch(Rectangle((0,1),1,1,fc='#4daf4a',alpha=0.7))
    axes.add_patch(Rectangle((1,1),1,1,fc='#fc9272',alpha=0.7))
    axes.add_patch(Rectangle((1,0),1,1,fc='#4daf4a',alpha=0.7))

    axes.set_xlim([0,2])
    axes.set_ylim([0,2])

    axes.set_xticks([0.5,1.5],labels=['R','S'])
    axes.set_yticks([0.5,1.5],labels=['S','R'])

    axes.text(0.5,0.5,row['FN'],ha='center',va='center')
    axes.text(1.5,0.5,row['TN'],ha='center',va='center')
    axes.text(0.5,1.5,row['TP'],ha='center',va='center')
    axes.text(1.5,1.5,row['FP'],ha='center',va='center')

    fig.savefig('pdf/figure-5/truthtable-'+row['dataset']+'-'+row['model']+'.pdf', bbox_inches='tight')
    plt.close()