In [1]:
#confidence_levels.ipynb 
# 
#by Joe Hahn
#joe.hahn@oracle.com
#18 July 2018
#
#plot the CNET model's confidence levels

In [2]:
#set system parameters
x_half_width = 0.5
radius = 3.5
box_half_width = 7.1
jitter = 1.0
rn_seed = 25
initial_id = 0

In [3]:
#generate sample of xo data
N_dots = 20000
import numpy as np
rn_state = np.random.RandomState(seed=rn_seed)
from helper_fns import *
xo = make_xo_df(N_dots, initial_id, x_half_width, radius, box_half_width, jitter)
#xo.tail()

In [4]:
#extract x-features and y = target variables
x_cols = ['x', 'y']
y_cols = ['O_score', 'X_score', 'B_score']
x = xo[x_cols].values
y = xo[y_cols].values
print x.shape, y.shape

(20000, 2) (20000, 3)


In [7]:
#load cnet model
from keras.models import load_model
model = load_model('cnet_model.h5')

In [13]:
#generate predictions and stash results in xo dataframe
y_pred = model.predict(x)     #wow this executes really slowly, why?!?
import pandas as pd
cols = [col + '_pred' for col in y_cols]
y_pred_df = pd.DataFrame(data=y_pred, columns=cols)
#normalize class scores
xo['sum'] = 0.0
for col in y_pred_df.columns:
    xo[col] = y_pred_df[col]
    xo['sum'] += y_pred_df[col]
for col in y_pred_df.columns:
    xo[col] /= xo['sum']
xo['sum'] = 0.0
for col in y_pred_df.columns:
    xo['sum'] += xo[col]
cols = y_pred_df.columns
xo['class_score'] = xo.apply(lambda x: x[cols].max(), axis=1)
xo['class_pred'] = xo.apply(lambda x: cols[x[cols].values.argmax()][0], axis=1)
cols = ['id', 'x', 'y', 'class', 'class_score', 'class_pred']
xo[cols].head(10)

Unnamed: 0_level_0,id,x,y,class,class_score,class_pred
record,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0,7.52172,1.063846,B,0.983288,B
1,1,-2.405319,-5.566259,B,0.575314,B
2,2,-3.277625,-5.264864,X,0.638419,X
3,3,2.412758,5.741974,B,0.595586,B
4,4,7.018886,-1.07304,B,0.911727,B
5,5,0.173553,-2.531221,X,0.498658,O
6,6,2.211263,-6.226027,B,0.72425,B
7,7,4.730813,-1.111841,O,0.495554,B
8,8,3.642523,-0.106196,O,0.446783,O
9,9,3.573174,0.583678,B,0.412619,O


In [None]:
#prep plots
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
sns.set(font_scale=1.5, font='DejaVu Sans')

In [None]:
#histogram model confidence
N_bins = 101
fig, ax = plt.subplots(1,1, figsize=(15, 6))
#historgram all predictions
xp = xo.class_score
p = sns.distplot(xp, bins=N_bins, ax=ax, kde=False, color='blue', label='all predictions')
#histogram incorrect predictions
idx = (xo['class'] != xo['class_pred'])
xp = xo[idx].class_score
p = sns.distplot(xp, bins=N_bins, ax=ax, kde=False, color='red', label='incorrect predictions')
p = ax.set_title('histogram of model confidence scores')
p = ax.set_xlabel('model confidence score')
p = ax.set_ylabel('number of records')
p = ax.legend()
ax.figure.savefig('figs/cnet_confidence_scores.png')

In [None]:
#scatterplot showing model's predicted decision boundaries
df = xo
xy_rng = (-6.5, 6.5)
sz = 30
f, ax = plt.subplots(figsize=(10,10))
clr_idx = -1
#B predictions having class_score>0.85
lbl = 'B'
clr_idx += 1
clr = colors = plt.rcParams["axes.prop_cycle"].by_key()["color"][clr_idx]
idx = (df['class_pred'] == lbl) & (df['class_score'] > 0.8)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.9, 's': sz}, marker='s')
#B predictions having 0.7>class_score>0.85
idx = (df['class_pred'] == lbl) & (df['class_score'] > 0.65) & (df['class_score'] < 0.8)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.35, 's': sz}, marker='s')
#B predictions having class_score<0.7
idx = (df['class_pred'] == lbl) & (df['class_score'] < 0.65)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.1, 's': sz}, marker='s')
#X predictions having class_score>0.7
lbl = 'X'
clr_idx += 1
clr = colors = plt.rcParams["axes.prop_cycle"].by_key()["color"][clr_idx]
idx = (df['class_pred'] == lbl) & (df['class_score'] > 0.8)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.9, 's': sz}, marker='s')
#X predictions having 0.5>class_score>0.7
idx = (df['class_pred'] == lbl) & (df['class_score'] > 0.65) & (df['class_score'] < 0.8)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.4, 's': sz}, marker='s')
#X predictions having class_score<0.5
idx = (df['class_pred'] == lbl) & (df['class_score'] < 0.65)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.1, 's': sz}, marker='s')
#O predictions having class_score>0.85
lbl = 'O'
clr_idx += 1
clr = colors = plt.rcParams["axes.prop_cycle"].by_key()["color"][clr_idx]
idx = (df['class_pred'] == lbl) & (df['class_score'] > 0.8)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.9, 's': sz}, marker='s')
#X predictions having 0.7>class_score>0.85
idx = (df['class_pred'] == lbl) & (df['class_score'] > 0.65) & (df['class_score'] < 0.8)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.4, 's': sz}, marker='s')
#X predictions having class_score<0.7
idx = (df['class_pred'] == lbl) & (df['class_score'] < 0.65)
p = sns.regplot(df[idx].x, df[idx].y, ax=ax, fit_reg=False, color=clr, scatter_kws={'alpha':0.1, 's': sz}, marker='s')
p = ax.set_title('MLP confidence intervals')
p = ax.set_xlabel('x')
p = ax.set_ylabel('y')
p = ax.set_xlim(xy_rng)
p = ax.set_ylim(xy_rng)
plt.savefig('figs/mlp_confidence_boundary.png')