This notebook explores the confidence the model has in errors compared to right answers.

In [4]:
BARS_PATH = "../bars/test/"

# Import & Load the Bar Categorization Model

In [1]:
import time
import tensorflow as tf

In [2]:
print('Loading categorization model...', end='')
start_time = time.time()

cat_model = tf.keras.models.load_model('./exported-models/bar_cat_EfficientNetB2_9723')

end_time = time.time()
elapsed_time = end_time - start_time
print('Done! Took {} seconds'.format(elapsed_time))

Done! Took 51.68789505958557 seconds


## Model evaluation

In [5]:
from os import listdir
from os.path import isfile, join

paths = [BARS_PATH + f for f in listdir(BARS_PATH) if isfile(join(BARS_PATH, f))]
paths

['../bars/test/1000767_h_1018431.png',
 '../bars/test/1001347_v_794159.png',
 '../bars/test/1002872_1.png',
 '../bars/test/1004288_h_1024768.png',
 '../bars/test/1007758_h_1019981.png',
 '../bars/test/1008617_108.png',
 '../bars/test/1011307_80.png',
 '../bars/test/1018431_123.png',
 '../bars/test/1018691_hv_794159.png',
 '../bars/test/1019572_h_1002872.png',
 '../bars/test/1019981_4.png',
 '../bars/test/1020886_h_1008617.png',
 '../bars/test/1023383_h_1011307.png',
 '../bars/test/1024768_40.png',
 '../bars/test/102978_hv_530532.png',
 '../bars/test/1033503_v_1018431.png',
 '../bars/test/1034799_hv_1018431.png',
 '../bars/test/1037827_v_788159.png',
 '../bars/test/1042691_hv_788159.png',
 '../bars/test/125743_v_1002872.png',
 '../bars/test/126748_v_233336.png',
 '../bars/test/137676_hv_210504.png',
 '../bars/test/147052_16.png',
 '../bars/test/148410_v_384036.png',
 '../bars/test/156033_v_530532.png',
 '../bars/test/165507_hv_796968.png',
 '../bars/test/16775_v_923680.png',
 '../bars/t

In [17]:
import pandas as pd

test_df = pd.DataFrame({'path': paths})
test_df["dec"] = test_df.apply(lambda x: int(x['path'].split("/")[-1].split("_")[0]), axis=1)
test_df["bin"] = test_df.apply(lambda x: bin(x['dec']), axis=1)
test_df["class"] = test_df.apply(lambda x: [i for i in reversed(range(20)) if (x['dec'] & 1 << i) != 0], axis=1)
    
print(test_df.shape)
test_df.head()

(200, 4)


Unnamed: 0,path,dec,bin,class
0,../bars/test/1000767_h_1018431.png,1000767,0b11110100010100111111,"[19, 18, 17, 16, 14, 10, 8, 5, 4, 3, 2, 1, 0]"
1,../bars/test/1001347_v_794159.png,1001347,0b11110100011110000011,"[19, 18, 17, 16, 14, 10, 9, 8, 7, 1, 0]"
2,../bars/test/1002872_1.png,1002872,0b11110100110101111000,"[19, 18, 17, 16, 14, 11, 10, 8, 6, 5, 4, 3]"
3,../bars/test/1004288_h_1024768.png,1004288,0b11110101001100000000,"[19, 18, 17, 16, 14, 12, 9, 8]"
4,../bars/test/1007758_h_1019981.png,1007758,0b11110110000010001110,"[19, 18, 17, 16, 14, 13, 7, 3, 2, 1]"


In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

target_size = (450, 100)
test_datagen = ImageDataGenerator()
test_generator = test_datagen.flow_from_dataframe(
    dataframe = test_df,
    directory = ".",
    target_size = target_size,
    shuffle = False,
    x_col = 'path',
    y_col = 'class',
    class_mode = 'categorical')

Found 200 validated image filenames belonging to 20 classes.


In [12]:
cat_model.evaluate(test_generator)



[0.09390565007925034, 0.9722500443458557]

# Explore Errors and Confidence Classifying Bars

In [14]:
prediction = cat_model.predict(test_generator)
prediction

array([[9.99994636e-01, 9.99394536e-01, 9.99995828e-01, ...,
        9.99999881e-01, 9.99998450e-01, 9.99039948e-01],
       [1.00000000e+00, 1.00000000e+00, 6.61531718e-10, ...,
        1.00000000e+00, 9.99998331e-01, 1.00000000e+00],
       [6.63127764e-07, 2.96091059e-07, 2.24232317e-11, ...,
        1.00000000e+00, 9.99999881e-01, 1.00000000e+00],
       ...,
       [1.24604940e-08, 1.00000000e+00, 8.64513822e-07, ...,
        9.99999762e-01, 1.00000000e+00, 9.99995947e-01],
       [1.00000000e+00, 1.10850875e-08, 3.47416215e-12, ...,
        9.99703109e-01, 1.00000000e+00, 9.99999762e-01],
       [9.99996662e-01, 4.57223587e-06, 9.99988675e-01, ...,
        1.27966981e-02, 3.15489690e-03, 5.36163920e-04]], dtype=float32)

In [19]:
# prediction in binary
test_df["pred_bin"] = ["{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}{}"
                   .format(round(x[19]),round(x[18]),round(x[17]),round(x[16]),round(x[15]), 
                           round(x[14]),round(x[13]),round(x[12]),round(x[11]),round(x[10]),
                           round(x[9]), round(x[8]), round(x[7]), round(x[6]), round(x[5]),
                           round(x[4]), round(x[3]), round(x[2]), round(x[1]), round(x[0])) for x in prediction]

#prediction in decimal
vals = []
for p in prediction:
    val = 0
    for i in range(20):
        val += round(p[i]) << i
    vals.append(val)
test_df["pred_dec"] = vals

# number of errors
test_df["errors"] = test_df.apply(lambda x: bin(x["dec"] ^ x["pred_dec"]).count("1"), axis=1)

test_df

Unnamed: 0,path,dec,bin,class,pred_bin,pred_dec,errors
0,../bars/test/1000767_h_1018431.png,1000767,0b11110100010100111111,"[19, 18, 17, 16, 14, 10, 8, 5, 4, 3, 2, 1, 0]",11110100010100111111,1000767,0
1,../bars/test/1001347_v_794159.png,1001347,0b11110100011110000011,"[19, 18, 17, 16, 14, 10, 9, 8, 7, 1, 0]",11110100011110000011,1001347,0
2,../bars/test/1002872_1.png,1002872,0b11110100110101111000,"[19, 18, 17, 16, 14, 11, 10, 8, 6, 5, 4, 3]",11110100110101111000,1002872,0
3,../bars/test/1004288_h_1024768.png,1004288,0b11110101001100000000,"[19, 18, 17, 16, 14, 12, 9, 8]",11110101001100000000,1004288,0
4,../bars/test/1007758_h_1019981.png,1007758,0b11110110000010001110,"[19, 18, 17, 16, 14, 13, 7, 3, 2, 1]",11110110000010001110,1007758,0
...,...,...,...,...,...,...,...
195,../bars/test/960254_v_521815.png,960254,0b11101010011011111110,"[19, 18, 17, 15, 13, 10, 9, 7, 6, 5, 4, 3, 2, 1]",11101010011011111110,960254,0
196,../bars/test/963002_v_383191.png,963002,0b11101011000110111010,"[19, 18, 17, 15, 13, 12, 8, 7, 5, 4, 3, 1]",11101011000110111110,963006,1
197,../bars/test/965538_v_286167.png,965538,0b11101011101110100010,"[19, 18, 17, 15, 13, 12, 11, 9, 8, 7, 5, 1]",11101011101110100010,965538,0
198,../bars/test/974681_136.png,974681,0b11101101111101011001,"[19, 18, 17, 15, 14, 12, 11, 10, 9, 8, 6, 4, 3...",11101101111101011001,974681,0


In [26]:
test_df["pred_class"] =  [[cl for cl, x in enumerate(prediction[i]) if x>0.5] for i in range(len(prediction))]

In [50]:
test_df["error_classes"] = [list(set(test_df["pred_class"][i]).symmetric_difference(set(test_df["class"][i]))) for i in range(len(test_df))]

In [51]:
test_df[["dec", "errors", "class", "pred_class", "error_classes"]]

Unnamed: 0,dec,errors,class,pred_class,error_classes
0,1000767,0,"[19, 18, 17, 16, 14, 10, 8, 5, 4, 3, 2, 1, 0]","[0, 1, 2, 3, 4, 5, 8, 10, 14, 16, 17, 18, 19]",[]
1,1001347,0,"[19, 18, 17, 16, 14, 10, 9, 8, 7, 1, 0]","[0, 1, 7, 8, 9, 10, 14, 16, 17, 18, 19]",[]
2,1002872,0,"[19, 18, 17, 16, 14, 11, 10, 8, 6, 5, 4, 3]","[3, 4, 5, 6, 8, 10, 11, 14, 16, 17, 18, 19]",[]
3,1004288,0,"[19, 18, 17, 16, 14, 12, 9, 8]","[8, 9, 12, 14, 16, 17, 18, 19]",[]
4,1007758,0,"[19, 18, 17, 16, 14, 13, 7, 3, 2, 1]","[1, 2, 3, 7, 13, 14, 16, 17, 18, 19]",[]
...,...,...,...,...,...
195,960254,0,"[19, 18, 17, 15, 13, 10, 9, 7, 6, 5, 4, 3, 2, 1]","[1, 2, 3, 4, 5, 6, 7, 9, 10, 13, 15, 17, 18, 19]",[]
196,963002,1,"[19, 18, 17, 15, 13, 12, 8, 7, 5, 4, 3, 1]","[1, 2, 3, 4, 5, 7, 8, 12, 13, 15, 17, 18, 19]",[2]
197,965538,0,"[19, 18, 17, 15, 13, 12, 11, 9, 8, 7, 5, 1]","[1, 5, 7, 8, 9, 11, 12, 13, 15, 17, 18, 19]",[]
198,974681,0,"[19, 18, 17, 15, 14, 12, 11, 10, 9, 8, 6, 4, 3...","[0, 3, 4, 6, 8, 9, 10, 11, 12, 14, 15, 17, 18,...",[]


In [131]:
from numpy import nan

# Average confidence of correct predictions
avg_conf_corr = []
avg_conf_err = []
lowest_err_conf = []
sec_lowest_err_conf = []
lowest_corr_conf = []
for idx, pred in enumerate(prediction):
    errors = test_df["errors"][idx]
    conf_corr = 0
    conf_err = 0
    l_err_conf = 2
    l2_err_conf = 2
    l_corr_conf = 2
    for i in range(len(pred)):
        conf = 1 - min(abs(pred[i]-1), pred[i])
        if i in test_df["error_classes"][idx]:            
            conf_err += conf/errors
            if conf < l_err_conf:
                l_err_conf = conf
            elif conf < l2_err_conf:
                l2_err_conf = conf
        else:
            conf_corr += conf/(len(pred) - errors)
            if conf < l_corr_conf:
                l_corr_conf = conf
            
    avg_conf_corr.append(conf_corr)
    avg_conf_err.append(conf_err)
    lowest_err_conf.append(l_err_conf)
    sec_lowest_err_conf.append(l2_err_conf)
    lowest_corr_conf.append(l_corr_conf)    
    
test_df["avg_conf_corr"] = avg_conf_corr
test_df["avg_conf_err"] = avg_conf_err
test_df["lowest_err_conf"] = lowest_err_conf
test_df["sec_lowest_err_conf"] = sec_lowest_err_conf
test_df["lowest_corr_conf"] = lowest_corr_conf
test_df.loc[test_df["sec_lowest_err_conf"] == 2,"sec_lowest_err_conf"] = nan
test_df.loc[test_df["lowest_err_conf"] == 2,"lowest_err_conf"] = nan

In [132]:
test_df[["errors", "class", "pred_class", "error_classes", "avg_conf_corr", "avg_conf_err", "lowest_corr_conf", "lowest_err_conf", "sec_lowest_err_conf"]]

Unnamed: 0,errors,class,pred_class,error_classes,avg_conf_corr,avg_conf_err,lowest_corr_conf,lowest_err_conf,sec_lowest_err_conf
0,0,"[19, 18, 17, 16, 14, 10, 8, 5, 4, 3, 2, 1, 0]","[0, 1, 2, 3, 4, 5, 8, 10, 14, 16, 17, 18, 19]",[],0.961258,0.000000,0.691036,,
1,0,"[19, 18, 17, 16, 14, 10, 9, 8, 7, 1, 0]","[0, 1, 7, 8, 9, 10, 14, 16, 17, 18, 19]",[],0.999896,0.000000,0.998245,,
2,0,"[19, 18, 17, 16, 14, 11, 10, 8, 6, 5, 4, 3]","[3, 4, 5, 6, 8, 10, 11, 14, 16, 17, 18, 19]",[],0.996682,0.000000,0.959753,,
3,0,"[19, 18, 17, 16, 14, 12, 9, 8]","[8, 9, 12, 14, 16, 17, 18, 19]",[],0.990435,0.000000,0.917470,,
4,0,"[19, 18, 17, 16, 14, 13, 7, 3, 2, 1]","[1, 2, 3, 7, 13, 14, 16, 17, 18, 19]",[],0.991611,0.000000,0.850669,,
...,...,...,...,...,...,...,...,...,...
195,0,"[19, 18, 17, 15, 13, 10, 9, 7, 6, 5, 4, 3, 2, 1]","[1, 2, 3, 4, 5, 6, 7, 9, 10, 13, 15, 17, 18, 19]",[],0.999335,0.000000,0.995115,,
196,1,"[19, 18, 17, 15, 13, 12, 8, 7, 5, 4, 3, 1]","[1, 2, 3, 4, 5, 7, 8, 12, 13, 15, 17, 18, 19]",[2],0.975757,0.995276,0.702527,0.995276,
197,0,"[19, 18, 17, 15, 13, 12, 11, 9, 8, 7, 5, 1]","[1, 5, 7, 8, 9, 11, 12, 13, 15, 17, 18, 19]",[],0.999820,0.000000,0.997457,,
198,0,"[19, 18, 17, 15, 14, 12, 11, 10, 9, 8, 6, 4, 3...","[0, 3, 4, 6, 8, 9, 10, 11, 12, 14, 15, 17, 18,...",[],0.999731,0.000000,0.994947,,


In [144]:
test_df["succ_corr"] = test_df["lowest_err_conf"]<test_df["lowest_corr_conf"]
test_df["succ_corr"][test_df["errors"]>0][test_df["errors"]<6].value_counts(normalize=True)

True     0.611111
False    0.388889
Name: succ_corr, dtype: float64

In [141]:
test_df["succ_2corr"] = test_df["sec_lowest_err_conf"]<test_df["lowest_corr_conf"]
test_df["succ_2corr"][test_df["errors"]>1][test_df["errors"]<6].value_counts(normalize=True)

False    0.944444
True     0.055556
Name: succ_2corr, dtype: float64