In [74]:
%matplotlib notebook

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D 
from skimage import io

# plt.rcParams['figure.figsize'] = (15,15)
# plt.rcParams.update({'font.size': 12})

In [75]:
# hard-coded parameters corresponding to test conditions,
# because my code is poorly planned out
NPOS_TEST = 6000
NNEG_TEST = 3457
NTESTING = NPOS_TEST + NNEG_TEST
NTRAINING = 6000

CSV_GENRESULT_FILE = 'hogsvm_result_all_1Apr2020.csv'
CSV_POST_FALSE_IMAGE_FILE = 'hogsvm_result_all_false_images.csv'
CSV_POST_PERFORMANCE_FILE = 'hogsvm_result_all_performance.csv'

In [76]:
df = pd.read_csv(CSV_GENRESULT_FILE)
df.sort_values(inplace=True, ascending=True, by=['Cells per Block', 'Pixels per Cell', 'Orientations'])
print('Number of entries:', len(df))

Number of entries: 352


### Minimise Miss Rate

In [98]:
# parameters to minimise people not being identified (FN)
# * Precision = TP/(TP+FP)  <--- classified P
# * Recall    = TP/(TP+FN)  <--- actually P

df['Miss Rate'] = df['FN'] / (df['TP'] + df['FN'])   # MR = (1 - Recall)
optimal_row = df.loc[df['FN'].idxmin()]

best_orientations = optimal_row.get(key='Orientations')
# for 3D plots
best_blocksize = optimal_row.get(key='Cells per Block')
best_cellsize = optimal_row.get(key='Pixels per Cell')
optimal_row

Test                      73.000000
Cells per Block            4.000000
Pixels per Cell            4.000000
Orientations              14.000000
Feature Size           49504.000000
AUC                        0.999438
Accuracy                   0.989743
Precision                  0.989803
Recall                     0.989743
Training Accuracy          1.000000
Prediction Time (s)       36.525964
TP                      3428.000000
TN                      5932.000000
FP                        68.000000
FN                        29.000000
Miss Rate                  0.008389
Name: 328, dtype: float64

In [78]:
# parameters to maximise Accuracy
df.loc[df['Accuracy'].idxmax()]
# (same as the row with min FN? (i.e. same orientations))
# NO, but very similar.

Test                      49.000000
Cells per Block            3.000000
Pixels per Cell            4.000000
Orientations              14.000000
Feature Size           31752.000000
AUC                        0.999466
Accuracy                   0.990483
Precision                  0.990514
Recall                     0.990483
Training Accuracy          1.000000
Prediction Time (s)       23.456352
TP                      3425.000000
TN                      5942.000000
FP                        58.000000
FN                        32.000000
Miss Rate                  0.009257
Name: 304, dtype: float64

In [79]:
# parameters to maximise AUC
df.loc[df['AUC'].idxmax()] # (also the same row)

# INTERESTING RESULT: discuss why chosen AUC != ACCURACY, yet this AUC performs worse!
# (maybe it's because AUC here is just a probabilistic estimate; not a true empirical
#  evaluation for this dataset)

Test                      51.000000
Cells per Block            3.000000
Pixels per Cell            4.000000
Orientations              16.000000
Feature Size           36288.000000
AUC                        0.999467
Accuracy                   0.989003
Precision                  0.989053
Recall                     0.989003
Training Accuracy          1.000000
Prediction Time (s)       27.070338
TP                      3422.000000
TN                      5931.000000
FP                        69.000000
FN                        35.000000
Miss Rate                  0.010124
Name: 306, dtype: float64

In [80]:
# https://matplotlib.org/3.1.1/gallery/mplot3d/3d_bars.html
# https://stackoverflow.com/questions/9433240/python-matplotlib-3d-bar-plot-adjusting-tick-label-position-transparent-b/9464391
# https://stackoverflow.com/questions/43869751/change-bar-color-in-a-3d-bar-plot-in-matplotlib-based-on-value
# https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html

def str_dim(l):
    return tuple(map(lambda v: '{0}x{0}'.format(v), l))

def plot_bar3d(x_labels, y_labels, dz_np, z_lim_min, z_lim_max, 
               x_title, y_title, z_title, fig_title, ztick_increment=0.5):
    # (x,y,z) coordinates of each bar
    x = list(range(len(x_labels))) * int(len(dz_np)/len(x_labels))
    y = [v for v in list(range(len(y_labels))) for i in range(int(len(dz_np)/len(y_labels)))]
    z = [0] * len(dz_np)
    # (width,depth,height) of each bar
    thickness = 0.4
    dx = [thickness] * len(dz_np)
    dy = [thickness] * len(dz_np)
    dz = list(dz_np)

    colors = plt.cm.jet(dz_np/dz_np.max())

    fig = plt.figure() # OOP style
    ax1 = fig.add_subplot(111, projection='3d')
    ax1.bar3d(x,y,z,dx,dy,dz,color=colors)

    tick_offset = thickness/2
    #ztick_increment = 0.5
    ticksx = np.arange(tick_offset, len(x_labels), 1)
    ax1.set_xticks(ticksx)
    ax1.set_xticklabels(x_labels)
    ticksy = np.arange(tick_offset, len(y_labels), 1)
    ax1.set_yticks(ticksy)
    ax1.set_yticklabels(y_labels)
    ticksz = np.arange(0, z_lim_max-z_lim_min+ztick_increment, ztick_increment)
    ticksz_labels = list(map(lambda v: '{:.1f}'.format(v), np.linspace(z_lim_min, z_lim_max, num=len(ticksz))))
    ax1.set_zticks(ticksz)
    ax1.set_zticklabels(ticksz_labels)

    ax1.set_xlabel(x_title)
    ax1.set_ylabel(y_title)
    ax1.set_zlabel(z_title)
    ax1.set_title(fig_title)

    fig.tight_layout()
    return fig, ax1

In [99]:
# plot 3D: (cell size, block size, miss rate) for fixed orientation
df_missrate3d = df.loc[df['Orientations']==best_orientations]
df_missrate3d = df_missrate3d.loc[:, ['Pixels per Cell', 'Cells per Block', 'Miss Rate']]

z_lim_min = 0
z_lim_max = 4.5
cellsize_labels = str_dim(df_missrate3d['Pixels per Cell'].unique())
blocksize_labels = str_dim(df_missrate3d['Cells per Block'].unique())
dz_np = df_missrate3d['Miss Rate'] * 100 - z_lim_min

fig, ax1 = plot_bar3d(cellsize_labels, blocksize_labels, dz_np, z_lim_min, z_lim_max,
                      'Cell size (pixels)', 'Block size (cells)', 'Miss Rate (%)', 
                      'Miss rate comparison (#orientations: {})'.format(int(best_orientations)))
ax1.view_init(elev=25, azim=155)
plt.show()

<IPython.core.display.Javascript object>

In [82]:
print('Max miss rate:')
print(df_missrate3d.loc[df_missrate3d['Miss Rate'].idxmax()])
print('\nMin miss rate:')
print(df_missrate3d.loc[df_missrate3d['Miss Rate'].idxmin()])
#df_missrate3d

Max miss rate:
Pixels per Cell    9.000000
Cells per Block    1.000000
Miss Rate          0.040787
Name: 271, dtype: float64

Min miss rate:
Pixels per Cell    4.000000
Cells per Block    4.000000
Miss Rate          0.008389
Name: 328, dtype: float64


### Maximise Accuracy

In [83]:
# plot 3D: (cell size, block size, accuracy) for fixed orientation
df_accuracy3d = df.loc[df['Orientations']==best_orientations]
df_accuracy3d = df_accuracy3d.loc[:, ['Pixels per Cell', 'Cells per Block', 'Accuracy']]

z_lim_min = 94
z_lim_max = 100
cellsize_labels = str_dim(df_accuracy3d['Pixels per Cell'].unique())
blocksize_labels = str_dim(df_accuracy3d['Cells per Block'].unique())
dz_np = df_accuracy3d['Accuracy'] * 100 - z_lim_min

fig, ax1 = plot_bar3d(cellsize_labels, blocksize_labels, dz_np, z_lim_min, z_lim_max, 
                      'Cell size (pixels)', 'Block size (cells)', 'Accuracy (%)', 
                      'Accuracy comparison (#orientations: {})'.format(int(best_orientations)))

ax1.view_init(elev=26, azim=-21)
plt.show()

# # (x,y,z) coordinates of each bar
# z_lim_min = 94
# x = list(range(len(cellsize_labels))) * int(len(df_accuracy3d)/len(cellsize_labels))
# y = [v for v in list(range(len(blocksize_labels))) for i in range(int(len(df_accuracy3d)/len(blocksize_labels)))]
# z = [0] * len(df_accuracy3d)
# # (width,depth,height) of each bar
# thickness = 0.4
# dx = [thickness] * len(df_accuracy3d)
# dy = [thickness] * len(df_accuracy3d)
# dz_np = df_accuracy3d['Accuracy'] * 100 - z_lim_min
# dz = list(dz_np)

# colors = plt.cm.jet(dz_np/dz_np.max())

# fig = plt.figure() # OOP style
# ax1 = fig.add_subplot(111, projection='3d')
# ax1.bar3d(x,y,z,dx,dy,dz,color=colors)

# tick_offset = thickness/2
# ztick_increment = 0.5
# ticksx = np.arange(tick_offset, len(cellsize_labels), 1)
# plt.xticks(ticksx, cellsize_labels)
# ticksy = np.arange(tick_offset, len(blocksize_labels), 1)
# plt.yticks(ticksy, blocksize_labels)
# ticksz = np.arange(0,100-z_lim_min+ztick_increment,ztick_increment)
# ticksz_labels = list(map(lambda v: '{:.1f}'.format(v), np.linspace(z_lim_min, 100, num=len(ticksz))))
# ax1.set_zticks(ticksz)
# ax1.set_zticklabels(ticksz_labels)

# ax1.set_xlabel('Cell size (pixels)')
# ax1.set_ylabel('Block size (cells)')
# ax1.set_zlabel('Accuracy (%)')
# ax1.set_title('Accuracy comparison (#orientations: {})'.format(int(best_orientations)))

# fig.tight_layout()
# plt.show()

<IPython.core.display.Javascript object>

In [84]:
print('Max accuracy:')
print(df_accuracy3d.loc[df_accuracy3d['Accuracy'].idxmax()])
print('\nMin accuracy:')
print(df_accuracy3d.loc[df_accuracy3d['Accuracy'].idxmin()])
#df_accuracy3d

Max accuracy:
Pixels per Cell    4.000000
Cells per Block    3.000000
Accuracy           0.990483
Name: 304, dtype: float64

Min accuracy:
Pixels per Cell    9.00000
Cells per Block    1.00000
Accuracy           0.95654
Name: 271, dtype: float64


### Maximise AUC

In [85]:
df_auc3d = df.loc[df['Orientations']==best_orientations]
df_auc3d = df_auc3d.loc[:, ['Pixels per Cell', 'Cells per Block', 'AUC']]

z_lim_min = 98.6
z_lim_max = 100
cellsize_labels = str_dim(df_auc3d['Pixels per Cell'].unique())
blocksize_labels = str_dim(df_auc3d['Cells per Block'].unique())
dz_np = df_auc3d['AUC'] * 100 - z_lim_min

fig, ax1 = plot_bar3d(cellsize_labels, blocksize_labels, dz_np, z_lim_min, z_lim_max, 
                      'Cell size (pixels)', 'Block size (cells)', 'AUC (%)', 
                      'AUC comparison (#orientations: {})'.format(int(best_orientations)),
                      ztick_increment=0.2)

ax1.view_init(elev=24, azim=-22)
plt.show()

<IPython.core.display.Javascript object>

In [86]:
print('Max AUC:')
print(df_auc3d.loc[df_auc3d['AUC'].idxmax()])
print('\nMin AUC:')
print(df_auc3d.loc[df_auc3d['AUC'].idxmin()])

Max AUC:
Pixels per Cell    4.000000
Cells per Block    3.000000
AUC                0.999466
Name: 304, dtype: float64

Min AUC:
Pixels per Cell    14.000000
Cells per Block     1.000000
AUC                 0.990506
Name: 277, dtype: float64


### Feature Vector Size 

In [87]:
df_hog3d = df.loc[df['Orientations']==best_orientations]
df_hog3d = df_hog3d.loc[:, ['Pixels per Cell', 'Cells per Block', 'Feature Size']]

z_lim_min = 2
z_lim_max = 5
ztick_increment = 0.5

# (non-log10 scale parameters)
#z_lim_max = 50000  
#ztick_increment = 10000

cellsize_labels = str_dim(df_hog3d['Pixels per Cell'].unique())
blocksize_labels = str_dim(df_hog3d['Cells per Block'].unique())
dz_np = np.log10(df_hog3d['Feature Size']) - z_lim_min

fig, ax1 = plot_bar3d(cellsize_labels, blocksize_labels, dz_np, z_lim_min, z_lim_max, 
                      'Cell size (pixels)', 'Block size (cells)', 'Number of features (log10)', 
                      'Feature vector length log comparison (#orientations: {})'.format(int(best_orientations)),
                      ztick_increment=ztick_increment)

ax1.view_init(elev=18, azim=-20)
plt.show()

<IPython.core.display.Javascript object>

In [88]:
print('Max FV length:')
print(df_hog3d.loc[df_hog3d['Feature Size'].idxmax()])
print('\nMin FV length:')
print(df_hog3d.loc[df_hog3d['Feature Size'].idxmin()])

Max FV length:
Pixels per Cell        4
Cells per Block        4
Feature Size       49504
Name: 328, dtype: int64

Min FV length:
Pixels per Cell     14
Cells per Block      1
Feature Size       280
Name: 277, dtype: int64


### Orientations Trend (for fixed block/cell size)

In [89]:
df_orientations = df.loc[(df['Cells per Block'] == best_blocksize) & (df['Pixels per Cell'] == best_cellsize)]
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
ax1.plot(df_orientations['Orientations'], df_orientations['Miss Rate'] * 100, marker='.')
ax1.set_xlabel('Number of orientations')
ax1.set_ylabel('Miss Rate (%)')
ax1.set_title('Orientations vs. Miss Rate (Cell-Size: {} px, Block-Size: {} cells)'.format(best_cellsize, best_blocksize))

ax2.plot(df_orientations['Orientations'], df_orientations['Accuracy'] * 100, marker='.')
ax2.set_xlabel('Number of orientations')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Orientations vs. Accuracy (Cell-Size: {} px, Block-Size: {} cells)'.format(best_cellsize, best_blocksize))

fig.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

### Performance Statistics

Matches result of HOG feature vector size to actual runtime

Analysis of this has been scrapped, but the logic is there to do it if required.

In [90]:
dfp = pd.read_csv(CSV_POST_PERFORMANCE_FILE)
dfp = pd.merge(df, dfp, how='left', on=['Cells per Block','Pixels per Cell','Orientations'])
print('Null values:\n' + str(np.sum(dfp.isna())))
dfp

Null values:
Test                            0
Cells per Block                 0
Pixels per Cell                 0
Orientations                    0
Feature Size                    0
AUC                             0
Accuracy                        0
Precision                       0
Recall                          0
Training Accuracy               0
Prediction Time (s)             0
TP                              0
TN                              0
FP                              0
FN                              0
Miss Rate                       0
HOG Processing Time (s)         0
Training Time (s)               0
Training Pickle Size (bytes)    0
dtype: int64


Unnamed: 0,Test,Cells per Block,Pixels per Cell,Orientations,Feature Size,AUC,Accuracy,Precision,Recall,Training Accuracy,Prediction Time (s),TP,TN,FP,FN,Miss Rate,HOG Processing Time (s),Training Time (s),Training Pickle Size (bytes)
0,1,1,4,6,1920,0.996012,0.974410,0.974472,0.974410,1.000000,0.388716,3350,5865,135,107,0.030952,42.006,6.046,79843
1,2,1,4,7,2240,0.996633,0.976525,0.976559,0.976525,1.000000,1.428725,3355,5880,120,102,0.029505,45.841,4.789,92695
2,3,1,4,8,2560,0.997329,0.978640,0.978680,0.978640,1.000000,0.474133,3367,5888,112,90,0.026034,43.547,4.787,105443
3,4,1,4,9,2880,0.996937,0.978323,0.978361,0.978323,1.000000,0.518857,3365,5887,113,92,0.026613,45.060,5.266,118243
4,5,1,4,10,3200,0.997586,0.980332,0.980374,0.980332,1.000000,0.640093,3376,5895,105,81,0.023431,43.065,4.234,131043
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
347,224,4,14,12,384,0.993615,0.959712,0.960965,0.959712,0.970500,0.071837,3362,5714,286,95,0.027480,9.772,0.864,18455
348,32,4,14,13,416,0.993347,0.960135,0.961342,0.960135,0.970000,0.261522,3362,5718,282,95,0.027480,8.586,0.733,19683
349,94,4,14,14,448,0.993936,0.960241,0.961500,0.960241,0.973000,0.270507,3365,5716,284,92,0.026613,8.765,0.836,20963
350,95,4,14,15,480,0.993444,0.959712,0.960882,0.959712,0.970000,0.306676,3358,5718,282,99,0.028638,9.132,0.713,22243


In [91]:
# get (HOG processing time + prediction time) per image; compare to feature vector length
# (not accurate, as the computer may go to sleep)

# z = dfp.loc[df['Orientations']==best_orientations]
# z['HOG Processing Time (s)']

# df_proc_time = dfp.loc[df['Orientations']==best_orientations]
# col_proc_time = df_proc_time['HOG Processing Time (s)'] + df_proc_time['Prediction Time (s)']

# z_lim_min = 0
# z_lim_max = 80
# ztick_increment = 10

# cellsize_labels = str_dim(df_proc_time['Pixels per Cell'].unique())
# blocksize_labels = str_dim(df_proc_time['Cells per Block'].unique())
# dz_np = col_proc_time - z_lim_min

# fig, ax1 = plot_bar3d(cellsize_labels, blocksize_labels, dz_np, z_lim_min, z_lim_max, 
#                       'Cell size (pixels)', 'Block size (cells)', 'Processing time (s/image)', 
#                       'Testing runtime comparison (#orientations: {})'.format(int(best_orientations)),
#                       ztick_increment=ztick_increment)

# ax1.view_init(elev=18, azim=-20)
# plt.show()

### Rank Falsely Labelled Images

In [92]:
def plot_false_images(fig,nr,nc,df_imgs):
    for i in range(len(df_imgs)):
        ax = fig.add_subplot(nr,nc,i+1)
        error_type, img_fp, error_occurrences = df_imgs.iloc[i]
        #print(error_type, image_fn, error_occurrences)
        img = io.imread(img_fp)
        ax.imshow(img)
        ax.axis('off')
        ax.set_title('{}: {} times'.format(error_type, error_occurrences))

In [93]:
df_falseimg = pd.read_csv(CSV_POST_FALSE_IMAGE_FILE).sort_values(ascending=False, by=['Error Occurrences'])
print('Images with both FP and FN entries:', len(df_falseimg) - len(df_falseimg['Image'].unique()))
df_falseimg

Images with both FP and FN entries: 0


Unnamed: 0,Error Type,Image,Error Occurrences
226,FP,Individual_Component\test\test_negative\000000...,352
2,FN,Individual_Component\test\test_positive\000000...,348
92,FP,Individual_Component\test\test_negative\000000...,347
191,FP,Individual_Component\test\test_negative\000000...,346
399,FP,Individual_Component\test\test_negative\000000...,344
...,...,...,...
2447,FP,Individual_Component\test\test_negative\000000...,1
2448,FN,Individual_Component\test\test_positive\000000...,1
2449,FN,Individual_Component\test\test_positive\000000...,1
2450,FP,Individual_Component\test\test_negative\000000...,1


In [100]:
# plot most common sample of FPs
df_falseimg_fp = df_falseimg.loc[df_falseimg['Error Type']=='FP'].sort_values(ascending=False, by=['Error Occurrences'])

nhead = 9
nr = 3
nc = int(np.ceil(nhead/nr))
fig = plt.figure(figsize=(8,8))
plot_false_images(fig,nr,nc,df_falseimg_fp[:nhead])

fig.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [101]:
# plot mid sample of FPs
# fig = plt.figure(figsize=(8,8))
# midpt = int((len(df_falseimg_fp)-nhead)/2)
# plot_false_images(fig,nr,nc,df_falseimg_fp[midpt:midpt+nhead])  # borrows FP params

# fig.tight_layout()
# plt.show()

In [102]:
# plot least common sample of FPs
fig = plt.figure(figsize=(8,8))
plot_false_images(fig,nr,nc,df_falseimg_fp[len(df_falseimg_fp)-nhead:])  # borrows FP params

fig.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [103]:
# plot most common FN
df_falseimg_fn = df_falseimg.loc[df_falseimg['Error Type']=='FN'].sort_values(ascending=False, by=['Error Occurrences'])
fig = plt.figure(figsize=(8,8))
plot_false_images(fig,nr,nc,df_falseimg_fn[:nhead])  # borrows FP params

fig.tight_layout()
plt.show()

# (shows occlusions, person is not centred?)

<IPython.core.display.Javascript object>