In [46]:
import pandas, numpy, pathlib

from skops.io import load

from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn

from misc import construct_line

pathlib.Path('pdf/figure-5').mkdir(exist_ok=True)

### Model validation on `validation` and `mic` sets

As discussed in the manuscript we have two further datasets (`validation` & `mic`) to evaluate the models on.

First let's load the trained models, the datasets and the results for Suspect-PZA


In [47]:
best_model = {}

for model in ['LR', 'NN', 'XB']:
    best_model[model] = load('models/'+model.lower()+'.skops', trusted=True)

X={}
Y={}
Z={}

for i in ['validation-samples', 'validation-mutations', 'mic']:
    X[i]={}
    Y[i]={}
    Z[i]={}
    with open('data/ds-'+i+'.npy', 'rb') as f:
        Y[i]['input'] = numpy.load(f)
        X[i]['input'] = numpy.load(f)
        Z[i]['input'] = numpy.load(f, allow_pickle=True)

# load in the results for SuspectPZA
suspectpza={}
for i in ['validation-samples', 'validation-mutations','mic']:
    suspectpza[i]={}
    with open('data/suspectpza-'+i+'.npy', 'rb') as f:
        suspectpza[i]['input'] = numpy.load(f)
        suspectpza[i]['predicted'] = numpy.load(f)
        suspectpza[i]['muts'] = numpy.load(f, allow_pickle=True)
line = []

For comparison later, let's make a subset of `validation-samples` that exclude those mutations which have an inconsistent phenotype, either because too few are in the dataset or they do not consistently test as R or S, suggesting they may be near the breakpoint/ECOFF. 

We need to do this for both the `validation-samples` features and the SuspectPZA results.

In [48]:
df = pandas.read_csv('data/ds-validation-mutations-full.csv')

mask = ~numpy.isin(Z['validation-samples']['input'], df[df.CONSISTENT_PHENOTYPE=='U'].MUTATION.unique())

X['validation-samples-noU']={}
Y['validation-samples-noU']={}
Z['validation-samples-noU']={}

X['validation-samples-noU']['input']=X['validation-samples']['input'][mask]
Y['validation-samples-noU']['input']=Y['validation-samples']['input'][mask]
Z['validation-samples-noU']['input']=Z['validation-samples']['input'][mask]

mask = ~numpy.isin(suspectpza['validation-samples']['muts'],(df[df.CONSISTENT_PHENOTYPE=='U'].MUTATION.unique()))
suspectpza['validation-samples-noU']={}
suspectpza['validation-samples-noU']['input']=suspectpza['validation-samples']['input'][mask]
suspectpza['validation-samples-noU']['predicted']=suspectpza['validation-samples']['predicted'][mask]
suspectpza['validation-samples-noU']['muts']=suspectpza['validation-samples']['muts'][mask]

The below function takes a supplied model, applies it to the features of the different datasets and measures a range of metrics we can use to evaluate their performance.

Note that this uses the separate `construct_line` function which can be found in `misc.py`.

In [49]:
def validate_model(line, best_model, model_name, X, Y):

    for dataset in ['validation-samples', 'validation-samples-noU', 'validation-mutations', 'mic']: 
        
        Y[dataset]['predicted'] = best_model.predict(X[dataset]['input'])
        Y[dataset]['scores'] = best_model.predict_proba(X[dataset]['input'])[:,1]

        row = construct_line(model_name , dataset, None, Y[dataset], None)
        line.append(row)

    return(line)

In [50]:
for model in ['LR', 'NN', 'XB']:
    line = validate_model(line, best_model[model], model, X, Y)

In [51]:
for i in ['validation-samples', 'validation-samples-noU', 'validation-mutations', 'mic']:
    line.append(construct_line('SP', i, None, suspectpza[i], None))

In [52]:
def bootstrap_model(line, best_model, model_name, X, Y):
    x={}
    y={}
    for dataset in ['validation-samples', 'validation-samples-noU', 'validation-mutations', 'mic']:
        for i in range(10):
            x['input'],y['input'] = resample(X[dataset]['input'], Y[dataset]['input'])
            y['predicted'] = best_model.predict(x['input'])
            y['scores'] = best_model.predict_proba(x['input'])[:,1]
            row = construct_line(model_name, dataset+'_'+str(i), None, y, None)
            line.append(row)
    return(line)

In [53]:
for model in ['LR', 'NN', 'XB']:
    line = bootstrap_model(line, best_model[model], model, X, Y)

In [54]:
test_results = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])
test_results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters
0,LR,validation-samples,97.557471,,43.845535,,80.006802,,545,698,68,2716,
1,LR,validation-samples-noU,98.615917,,58.116481,,85.212409,,469,338,32,2280,
2,LR,validation-mutations,97.419355,,50.000000,,87.243402,,22,22,4,151,
3,LR,mic,100.000000,,14.285714,,68.000000,,1,6,0,50,
4,NN,validation-samples,94.755747,,48.833467,,77.079895,,607,636,146,2638,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,XB,mic_5,100.000000,,0.000000,,66.071429,,0,1,0,56,
132,XB,mic_6,100.000000,,10.000000,,57.021277,,1,9,0,47,
133,XB,mic_7,100.000000,,16.666667,,50.653595,,1,5,0,51,
134,XB,mic_8,100.000000,,25.000000,,54.716981,,1,3,0,53,


In [55]:
line = []
for i in ['validation-samples', 'validation-samples-noU', 'validation-mutations', 'mic']:
    for model in ['LR', 'NN', 'XB']:
        sens_mean = test_results[(test_results.model==model) & (test_results.dataset!=i) & (test_results.dataset.str.contains(i))].sensitivity_mean.mean()
        sens_std = 1.96*test_results[(test_results.model==model) & (test_results.dataset!=i) & (test_results.dataset.str.contains(i))].sensitivity_mean.std()/10**0.5
        spec_mean = test_results[(test_results.model==model) & (test_results.dataset!=i) & (test_results.dataset.str.contains(i))].specificity_mean.mean()
        spec_std = 1.96*test_results[(test_results.model==model) & (test_results.dataset!=i) & (test_results.dataset.str.contains(i))].specificity_mean.std()/10**0.5
        roc_mean = test_results[(test_results.model==model) & (test_results.dataset!=i) & (test_results.dataset.str.contains(i))].roc_auc_mean.mean()
        row = [model, 'bootstrapped-'+i, sens_mean, sens_std, spec_mean, spec_std, roc_mean, None, None, None, None, None, None ]
        line.append(row)

extra_rows = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])
test_results  = pandas.concat([test_results, extra_rows])
test_results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters
0,LR,validation-samples,97.557471,,43.845535,,80.006802,,545,698,68,2716,
1,LR,validation-samples-noU,98.615917,,58.116481,,85.212409,,469,338,32,2280,
2,LR,validation-mutations,97.419355,,50.000000,,87.243402,,22,22,4,151,
3,LR,mic,100.000000,,14.285714,,68.000000,,1,6,0,50,
4,NN,validation-samples,94.755747,,48.833467,,77.079895,,607,636,146,2638,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,NN,bootstrapped-validation-mutations,96.505404,0.875136,54.032237,4.43408,80.605272,,,,,,
8,XB,bootstrapped-validation-mutations,97.825508,0.766452,55.255627,5.065366,87.374763,,,,,,
9,LR,bootstrapped-mic,100.000000,0.0,17.651515,6.194888,70.270296,,,,,,
10,NN,bootstrapped-mic,94.928629,2.655925,23.358586,11.979755,69.562217,,,,,,


In [59]:
# test_results = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])

# calculate the diagnostic odds ration
# test_results['diagnostic_odds_ratio_mean'] = (test_results['TN']*test_results['TP'])/(test_results['FN']*test_results['FP'])
test_results['diagnostic_odds_ratio_mean'] = 0
test_results['diagnostic_odds_ratio_std'] = 0

# save to disc as a CSV
test_results.to_csv('results-validation.csv', index=False)

test_results[:20]

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio_std,diagnostic_odds_ratio_mean
0,LR,validation-samples,97.557471,,43.845535,,80.006802,,545,698,68,2716,,0,0
1,LR,validation-samples-noU,98.615917,,58.116481,,85.212409,,469,338,32,2280,,0,0
2,LR,validation-mutations,97.419355,,50.0,,87.243402,,22,22,4,151,,0,0
3,LR,mic,100.0,,14.285714,,68.0,,1,6,0,50,,0,0
4,NN,validation-samples,94.755747,,48.833467,,77.079895,,607,636,146,2638,,0,0
5,NN,validation-samples-noU,96.237024,,63.07311,,82.534983,,509,298,87,2225,,0,0
6,NN,validation-mutations,96.774194,,52.272727,,81.480938,,23,21,5,150,,0,0
7,NN,mic,96.0,,28.571429,,69.714286,,2,5,2,48,,0,0
8,XB,validation-samples,97.413793,,44.328238,,80.775215,,551,692,72,2712,,0,0
9,XB,validation-samples-noU,97.83737,,61.214374,,86.706366,,494,313,50,2262,,0,0


In [60]:
test_results.model.value_counts()

LR    48
NN    48
XB    48
SP     4
Name: model, dtype: int64