In [61]:
import pandas, numpy

from skops.io import load

from misc import construct_line

### Model validation

We can now evaluate the trained models on the `test` dataset. As discussed in the manuscript we have two further datasets (`validation` & `mic`) to test the models on.


In [62]:
best_model = {}

for model in ['LR', 'NN', 'XB']:
    best_model[model] = load('models/'+model.lower()+'.skops', trusted=True)

X={}
Y={}
Z={}

for i in ['test','validation', 'mic']:
    X[i]={}
    Y[i]={}
    Z[i]={}
    with open('data/ds-'+i+'.npy', 'rb') as f:
        Y[i]['input'] = numpy.load(f)
        X[i]['input'] = numpy.load(f)
        Z[i]['input'] = numpy.load(f, allow_pickle=True)

# load in the results for SuspectPZA
suspectpza={}
for i in ['test', 'validation', 'mic']:
    suspectpza[i]={}
    with open('data/suspectpza-'+i+'.npy', 'rb') as f:
        suspectpza[i]['input'] = numpy.load(f)
        suspectpza[i]['predicted'] = numpy.load(f)

line = []

The below function takes a supplied model, applies it to the features of the different datasets and measures a range of metrics we can use to evaluate their performance.

Note that this uses the separate `construct_line` function which can be found in `misc.py`.

In [63]:
def validate_model(line, best_model, model_name, X, Y):

    for dataset in ['test','validation', 'mic']: 
        
        Y[dataset]['predicted'] = best_model.predict(X[dataset]['input'])
        Y[dataset]['scores'] = best_model.predict_proba(X[dataset]['input'])[:,1]

        row = construct_line(model_name , dataset, None, Y[dataset], None)
        line.append(row)

    return(line)

Now all we have to do is iterate through the models and call the `validate_model` function

In [64]:
for model in ['LR', 'NN', 'XB']:
    line = validate_model(line, best_model[model], model, X, Y)

In [73]:
for i in ['test', 'validation', 'mic']:
    line.append(construct_line('SP', i, None, suspectpza[i], None))

As before, let's convert the results into a Pandas dataframe and save it to disc so we can plot graphs, create tables etc

In [74]:
test_results = pandas.DataFrame(line, columns=['model', 'dataset', 'sensitivity_mean', 'sensitivity_std', 'specificity_mean', 'specificity_std' ,'roc_auc_mean', 'roc_auc_std','TN','FP','FN','TP', 'model_parameters'])

# calculate the diagnostic odds ration
test_results['diagnostic_odds_ratio'] = (test_results['TN']*test_results['TP'])/(test_results['FN']*test_results['FP'])

# save to disc as a CSV
test_results.to_csv('results-test.csv', index=False)

test_results

Unnamed: 0,model,dataset,sensitivity_mean,sensitivity_std,specificity_mean,specificity_std,roc_auc_mean,roc_auc_std,TN,FP,FN,TP,model_parameters,diagnostic_odds_ratio
0,LR,test,78.640777,,70.103093,,82.684416,,68,29,22,81,,8.633229
1,LR,validation,97.557471,,43.845535,,80.006802,,545,698,68,2716,,31.186162
2,LR,mic,100.0,,14.285714,,68.0,,1,6,0,50,,inf
3,NN,test,76.699029,,67.010309,,79.861876,,65,32,24,79,,6.686198
4,NN,validation,94.755747,,48.833467,,77.079895,,607,636,146,2638,,17.244615
5,NN,mic,96.0,,28.571429,,69.714286,,2,5,2,48,,9.6
6,XB,test,77.669903,,75.257732,,82.764488,,73,24,23,80,,10.57971
7,XB,validation,97.413793,,44.328238,,80.775215,,551,692,72,2712,,29.991811
8,XB,mic,100.0,,14.285714,,66.0,,1,6,0,50,,inf
9,SP,test,95.145631,,97.938144,,,,95,2,5,98,,931.0
