# Model analysis
Load model and analyse performance

In [None]:
import os
from pathlib import Path
import time
from datetime import datetime
from dateutil.tz import gettz
import itertools

import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras.models import model_from_json

from sklearn.metrics import confusion_matrix

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

np.random.seed(999123)

In [None]:
fdir = '../../data/private_data/private_events_dev2'
fname = 'private_correct_plus'
model_type = 'MLP'

In [None]:
modelfile = '../../logs/2019-05-11T19:09/private_correct_plus/model'
json_file = open(modelfile+'.json', 'r')
loaded_json_model = json_file.read()
json_file.close()
model = model_from_json(loaded_json_model)
# load weights into new model
model.load_weights(modelfile+'.h5')
print('Model loaded from file', modelfile)

In [None]:
def readucr(filename):
    ''' Load a dataset from a file in UCR format
    space delimited, class labels in the first column.
    Returns
    X : DNN input data
    Y : class labels
    '''
    data = np.loadtxt(Path(filename))
    Y = data[:,0]
    X = data[:,1:]
    return X, Y


def reshape(x, model_type):
    ''' Reshape data into input format for the selected DNN '''
    if model_type == 'ResNet':
        return reshape_2d(x)
    elif model_type == 'FCN' or model_type == 'FCN_HARUS' or model_type == 'ResNet_tuned':
        return reshape_1d(x)
    elif model_type == 'MLP':
        return x
    else:
        raise ValueError('Unrecognised model type')
    return x


# Estimate x_train mean and std
x_train, y_train = readucr(fdir+'/'+fname+'/'+fname+'_TRAIN.txt')
x_train_mean = x_train.mean()
x_train_std = x_train.std()
model_params = {'x_train_mean':x_train_mean, 'x_train_std':x_train_std}

other = fname+'_END_TEST' #_dog_incorrect' # 'private_dog0_correct_plus_END_TEST'
datadir = fdir+'/'+fname
print('Testing on:', datadir+'/'+other+'.txt')
x_other, y_other = readucr(datadir+'/'+other+'.txt')


def predictions(model, model_params, model_type, 
                x_input, y_input, name, threshold=0.5):
    ''' Use the model to make predictions on x_input data. Return the predictions and the calculated accuracy. '''    
    do_print = True
    y_input = y_input - y_input.min()
    x_input = (x_input - model_params['x_train_mean'])/(model_params['x_train_std'])
    x_input = reshape(x_input, model_type)
    nb_classes = len(np.unique(y_input))
    y_input = (y_input - y_input.min())/(y_input.max()-y_input.min())*(nb_classes-1)
    # Class balance
    n0 = (y_input == 0).sum()
    n1 = (y_input == 1).sum()
    
    # Calculate model prediction
    y_probs = model.predict_on_batch(x_input)
    if threshold == 0.5:
        y_pred = np.round(y_probs).flatten()
    else:
        y_pred = y_probs.flatten()
        y_pred[y_pred > threshold] = 1
        y_pred[y_pred <= threshold] = 0
        
    cm = confusion_matrix(y_input, y_pred, labels=[1,0])
    acc_calc = (cm[0][0]+cm[1][1])/(cm.sum())
    cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    if do_print:
        print('Predicted class probabilities:\n', y_probs[:5,:])
        print('Pred', y_pred[:20])
        print('True', y_input[:20].astype(int))
        print(cm)
        print('Calculated accuracy:',acc_calc)
        print('Class balance in test set:', n0, 'to', n1, 'i.e.', n0/(n0+n1))

    return y_probs, y_pred, acc_calc

#y_probs, y_pred, acc = predictions(model, model_params, model_type, x_test, y_test, fname)

In [None]:
other = fname+'_END_TEST' 
datadir = fdir+'/'+fname
print('Testing on:', datadir+'/'+other+'.txt')
x_other, y_other = readucr(datadir+'/'+other+'.txt')
y_other_probs, y_other_pred, other_acc = predictions(
    model, model_params, model_type, 
    x_other, y_other, other)
# Get dog result
meta = pd.read_csv(datadir+'/'+other+'_meta.txt', sep=',', parse_dates=['date'])
cm = confusion_matrix(y_other, meta['dog_pred'], labels=[1,0])
print('Dog cm \n', cm)
dog_acc = (cm[0][0]+cm[1][1])/(cm.sum())
cm_norm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print('True', y_other[:20])
print('Dog ', meta['dog_pred'].values[:20])
print('Dog accuracy', dog_acc)

# Classification probability
On tuned MLP, trained on all dogs correct.

In [None]:
class_colors = ['darkorange', 'steelblue']
print(y_other_probs.shape[0])
x = np.arange(y_other_probs.shape[0])
class_cmap = matplotlib.colors.ListedColormap(class_colors)
fig, ax = plt.subplots()
plt.scatter(np.arange(y_other_probs.shape[0]), y_other_probs, linestyle='None', marker='x', 
            c=y_other, cmap=class_cmap)
plt.title('Orange: true class 0\nBlue: true class 1')
ax.set_xlabel('Test sample number')
ax.set_ylabel('Model: probability of belonging to class 1')
ax.set_ylim(bottom=0, top=1)
