# Model analysis
Load model and analyse performance

In [None]:
import os
from pathlib import Path
import time
from datetime import datetime
from dateutil.tz import gettz
import itertools

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras.models import model_from_json

from sklearn.metrics import confusion_matrix

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

np.random.seed(999123)

In [None]:
fdir = '../../data/private_data/private_events_dev2'
fname = 'private_correct_plus'
model_type = 'MLP'

In [None]:
modelfile = '../../logs/2019-05-11T19:09/private_correct_plus/model'
json_file = open(modelfile+'.json', 'r')
loaded_json_model = json_file.read()
json_file.close()
model = model_from_json(loaded_json_model)
# load weights into new model
model.load_weights(modelfile+'.h5')
print('Model loaded from file', modelfile)

In [None]:
def readucr(filename):
    ''' Load a dataset from a file in UCR format
    space delimited, class labels in the first column.
    Returns
    X : DNN input data
    Y : class labels
    '''
    data = np.loadtxt(Path(filename))
    Y = data[:,0]
    X = data[:,1:]
    return X, Y


def reshape(x, model_type):
    ''' Reshape data into input format for the selected DNN '''
    if model_type == 'ResNet':
        return reshape_2d(x)
    elif model_type == 'FCN' or model_type == 'FCN_HARUS' or model_type == 'ResNet_tuned':
        return reshape_1d(x)
    elif model_type == 'MLP':
        return x
    else:
        raise ValueError('Unrecognised model type')
    return x


# Estimate x_train mean and std
x_train, y_train = readucr(fdir+'/'+fname+'/'+fname+'_TRAIN.txt')
x_train_mean = x_train.mean()
x_train_std = x_train.std()
model_params = {'x_train_mean':x_train_mean, 'x_train_std':x_train_std}

other = fname+'_END_TEST' #_dog_incorrect' # 'private_dog0_correct_plus_END_TEST'
datadir = fdir+'/'+fname
print('Testing on:', datadir+'/'+other+'.txt')
x_other, y_other = readucr(datadir+'/'+other+'.txt')


def predictions(model, model_params, model_type, 
                x_input, y_input, name, threshold=0.5):
    ''' Use the model to make predictions on x_input data. Return the predictions and the calculated accuracy. '''    
    do_print = True
    y_input = y_input - y_input.min()
    x_input = (x_input - model_params['x_train_mean'])/(model_params['x_train_std'])
    x_input = reshape(x_input, model_type)
    nb_classes = len(np.unique(y_input))
    y_input = (y_input - y_input.min())/(y_input.max()-y_input.min())*(nb_classes-1)
    # Class balance
    n0 = (y_input == 0).sum()
    n1 = (y_input == 1).sum()
    
    # Calculate model prediction
    y_probs = model.predict_on_batch(x_input)
    if threshold == 0.5:
        y_pred = np.round(y_probs).flatten()
    else:
        y_pred = y_probs.flatten()
        y_pred[y_pred > threshold] = 1
        y_pred[y_pred <= threshold] = 0
        
    cm = confusion_matrix(y_input, y_pred, labels=[1,0])
    acc_calc = (cm[0][0]+cm[1][1])/(cm.sum())
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    if do_print:
        print('Predicted class probabilities:\n', y_probs[:5,:])
        print('Pred', y_pred[:20])
        print('True', y_input[:20].astype(int))
        print(cm)
        print('Calculated accuracy:',acc_calc)
        print('Class balance in test set:', n0, 'to', n1, 'i.e.', n0/(n0+n1))

    return y_probs, y_pred, acc_calc

#y_probs, y_pred, acc = predictions(model, model_params, model_type, x_test, y_test, fname)

In [None]:
other = fname+'_END_TEST' 
datadir = fdir+'/'+fname
print('Testing on:', datadir+'/'+other+'.txt')
x_other, y_other = readucr(datadir+'/'+other+'.txt')
y_other_probs, y_other_pred, other_acc = predictions(
    model, model_params, model_type, 
    x_other, y_other, other)
# Get dog result
meta = pd.read_csv(datadir+'/'+other+'_meta.txt', sep=',', parse_dates=['date'])
cm = confusion_matrix(y_other, meta['dog_pred'], labels=[1,0])
print('Dog cm \n', cm)
dog_acc = (cm[0][0]+cm[1][1])/(cm.sum())
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('True', y_other[:20])
print('Dog ', meta['dog_pred'].values[:20])
print('Dog accuracy', dog_acc)

# Classification probability
On tuned MLP, trained on all dogs correct.

In [None]:
class_colors = ['darkorange', 'steelblue']
class_cmap = matplotlib.colors.ListedColormap(class_colors)
fig, ax = plt.subplots()
plt.scatter(np.arange(y_other_probs.shape[0]), y_other_probs, linestyle='None', marker='x', 
            c=y_other, cmap=class_cmap)
plt.title('Orange: true class 0\nBlue: true class 1')
ax.set_xlabel('Test sample number')
ax.set_ylabel('Model: probability of belonging to class 1')
ax.set_ylim(bottom=0, top=1)

In [None]:
probs_df = pd.DataFrame({'true': y_other.astype(int), 'dnn_pred': y_other_pred.astype(int), 'dnn_prob': y_other_probs[:,0]})
df = pd.concat([meta, probs_df], axis=1)
print(df.head())

# Most confident incorrect answer for true class 1
min_prob = probs_df[probs_df['dnn_pred'] != probs_df['true']]['dnn_prob'].min()
print(probs_df[probs_df['dnn_prob'] == min_prob])
print(probs_df[probs_df['dnn_pred'] != probs_df['true']]['dnn_prob'].nsmallest(10))

# Most confident incorrect answer for true class 0
max_prob = probs_df[probs_df['dnn_pred'] != probs_df['true']]['dnn_prob'].max()
print(probs_df[probs_df['dnn_prob'] == max_prob])
print(probs_df[probs_df['dnn_pred'] != probs_df['true']]['dnn_prob'].nlargest(5))

In [None]:
def get_result_key(true, pred):
    ''' Return TP, TN, FP, FN as appropriate '''
    print(true, pred)
    if true == 1:
        print('true 1')
        if pred == 1:
            print('pred 1')
            result = 'TP'
        else:
            print('pred 0')
            result = 'FN'
    else:
        print('true 0')
        if pred == 1:
            result = 'FP'
        else:
            result = 'TN'
    print(result)
    return result


def add_to_plot(plot_i, data_i, color):
    ''' Add a subplot using the given data sample '''
    print(df.iloc[data_i][['filename', 'sensor_number', 'dog_result', 'Concentration']])
    ax[plot_i].set_ylim(bottom=0, top=2.2)
    ax[plot_i].plot(x_other[data_i], color='red')
    ax[plot_i].set_facecolor(color)
    true_class = df.iloc[data_i]['class']
    dnn_class = df.iloc[data_i]['dnn_pred']
    dnn_prob = '{0:.2f}'.format(df.iloc[data_i]['dnn_prob'])
    dog_pred = int(df.iloc[data_i]['dog_pred'])
    dnn_result = get_result_key(true_class, dnn_class)
    dog_result = get_result_key(true_class, dog_pred)
    title = 'True '+str(true_class)+' : DNN '+str(dnn_class)+' (p = '+str(dnn_prob)+') : dog '+str(dog_pred)
    title = title + '   (DNN '+dnn_result+' : dog '+dog_result+')'
    ax[plot_i].set_title(title)

In [None]:
# Top 4 most confident incorrect answer for true class 1
fig, ax = plt.subplots(4, 1, sharex='col', sharey='row', figsize=(10, 8))
add_to_plot(0, 138, 'lightcyan')
add_to_plot(1, 126, 'lightcyan')
add_to_plot(2, 44, 'lightcyan')
add_to_plot(3, 108, 'lightcyan')
plt.savefig('model_analysis_class_1_'+fname+'.png', bbox_inches='tight')

In [None]:
# Next 4 most confident incorrect answer for true class 1
fig, ax = plt.subplots(4, 1, sharex='col', sharey='row', figsize=(10, 8))
add_to_plot(0, 105, 'lightcyan')
add_to_plot(1, 32, 'lightcyan')
add_to_plot(2, 74, 'lightcyan')
add_to_plot(3, 27, 'lightcyan')

plt.savefig('model_analysis_class_1_plot2_'+fname+'.png', bbox_inches='tight')

In the 8 plots above, concentration is from 
+ 2e-8 which is 1 in 50m
+ to
+ 2e-7 which is 1 in 5m

In the lab tests the lowest concentration in the tests was 1 im 200m. Only 22 passes were at 1 in 50m or weaker but around 30% of postive passes were at 1 in 25m or weaker. So the passes where the DNN gave FN were not unusually weak.

In [None]:
# Top 4 most confident incorrect answer for true class 0
fig, ax = plt.subplots(4, 1, sharex='col', sharey='row', figsize=(10, 8))

add_to_plot(0, 135, 'lightyellow')
add_to_plot(1, 141, 'lightyellow')
add_to_plot(2, 144, 'lightyellow')
add_to_plot(3, 9, 'lightyellow')

plt.savefig('model_analysis_class' + str(0) + '_'+fname+'.png', bbox_inches='tight')

Given the plots above - plots 0 and 3 show a long delay between first touch and second search. Both are dog0. Maybe dog0 data needs to have a longer event window.

# Probabilities by concentration, etc

In [None]:
this_df = df[df['true']==1]
print(this_df.head())
print('Concentration range', this_df['Concentration'].min(), this_df['Concentration'].max())
df_a = this_df[this_df['dog_pred']==1]
df_b = this_df[this_df['dog_pred']==0]
fig, ax = plt.subplots(1, 2, sharex='col', sharey='row', figsize=(10, 4))
ax[0].scatter(df_a['Concentration'], df_a['dnn_prob'], linestyle='None', marker='x', 
            color='green', label='dog 1 (TP)')
ax[0].scatter(df_b['Concentration'], df_b['dnn_prob'], linestyle='None', marker='x', 
            color='red', label='dog 0 (FN)')
ax[0].legend(loc="upper right")
ax[0].set_ylabel('DNN: probability of belonging to class 1')
ax[0].set_xlim(left = 1e-8, right = 1e-3)
ax[0].set_xscale('log')
ax[0].set_ylim(bottom=0, top=1)
ax[0].set_xlabel('Concentration')
ax[0].set_title('True class : 1')
ax[0].set_facecolor('lightcyan')

ax[1].scatter(np.arange(df['dnn_prob'].shape[0]), df['dnn_prob'], linestyle='None', marker='x', 
            c=df['true'], cmap=class_cmap)
ax[1].set_title('Orange: true class 0\nBlue: true class 1')
ax[1].set_xlabel('Test sample number')

From the plots above -
+ The dog gives FN at the weaker concentrations
+ Most dog FNs also then generate DNN FNs. I.e. all but 2 dog FNs result in DNN FNs.
+ No clear trend from DNN or dog that "correctness" is a function of concentration.

In [None]:
grouper = 'time'

fig, ax = plt.subplots(1, 2, sharex='col', sharey='row', figsize=(10, 4))

this_df = df[df['true']==1]
df_a = this_df[(df['dog_result']=='TN') | (this_df['dog_result']=='TP')]
df_b = this_df[(df['dog_result']=='FN') | (this_df['dog_result']=='FP')]

ax[0].scatter(df_a[grouper], df_a['dnn_prob'], linestyle='None', marker='x', 
            color='green', label='dog 1 (TP)')
ax[0].scatter(df_b[grouper], df_b['dnn_prob'], linestyle='None', marker='x', 
            color='red', label='dog 0 (FN)')
#ax[0].legend(loc="upper right")
ax[0].set_xlabel(grouper)
ax[0].set_ylabel('DNN: probability of belonging to class 1')
ax[0].set_ylim(bottom=0, top=1)
ax[0].set_facecolor('lightcyan')


this_df = df[df['true']==0]
df_a = this_df[(df['dog_result']=='TN') | (this_df['dog_result']=='TP')]
df_b = this_df[(df['dog_result']=='FN') | (this_df['dog_result']=='FP')]
ax[1].scatter(df_a[grouper], df_a['dnn_prob'], linestyle='None', marker='x', 
            color='green', label='dog 1 (TP)')
ax[1].scatter(df_b[grouper], df_b['dnn_prob'], linestyle='None', marker='x', 
            color='red', label='dog 0 (FN)')
#ax[1].legend(loc="upper right")
ax[1].set_xlabel(grouper)
ax[1].set_ylabel('DNN: probability of belonging to class 1')
ax[1].set_ylim(bottom=0, top=1)
ax[1].set_facecolor('lightyellow')

dogs are getting more incorrect later in the day. Is that true for the entire dataset? It could be directly related to concentration - more weak concentration tests were run late in the day.