In [12]:
import numpy as np
import matplotlib.pyplot as plt
import os, time
from skimage import io
from skimage.feature import hog
from sklearn import svm
from sklearn import linear_model
from sklearn import metrics
from sklearn import calibration
import pickle
import copy
import csv

In [13]:
plt.rcParams['figure.figsize'] = (15,15)
plt.rcParams.update({'font.size': 12})

In [14]:
RESULTS_FH = None # global

In [15]:
DIR = 'Individual_Component'
SEED = 42

# [HOG parameters]
# img (w,h): (64,80)
# NFEATURES = nblocks * norientations/cell * ncells/block
POWER_LAW_COMPRESSION = True

BLOCK_SIZE_LIST = [1, 2, 3, 4]
BLOCK_NORM_LIST = ['L2-Hys'] # available: ['L1', 'L1-sqrt', 'L2', 'L2-Hys']
CELL_PIXEL_LIST = [4, 5, 6, 7, 8, 9, 10, 14]
ORIENTATION_LIST = [14,15,16]#[6, 7, 8, 9, 10, 11, 12,13]
NPOS_TRAINING_IMGS = 3000
NNEG_TRAINING_IMGS = 3000

## (Test platform test runs)

# BLOCK_SIZE_LIST = [1]
# BLOCK_NORM_LIST = ['L1']
# CELL_PIXEL_LIST = [4]
# ORIENTATION_LIST = [8, 6]
# NPOS_TRAINING_IMGS = 10
# NNEG_TRAINING_IMGS = 10

# BLOCK_SIZE_LIST = [1,2,4]
# BLOCK_NORM_LIST = ['L2-Hys'] # available: ['L1', 'L1-sqrt', 'L2', 'L2-Hys']
# CELL_PIXEL_LIST = [5, 6, 7, 8, 9, 10, 14]
# ORIENTATION_LIST = [7,11]
# NPOS_TRAINING_IMGS = 3000
# NNEG_TRAINING_IMGS = 3000

In [16]:
if RESULTS_FH != None:
    RESULTS_FH.close()
print('Results file closed.')

Results file closed.


In [17]:
def list_mul(l, num):
    return tuple(map(lambda x: int(num * x), l))

# Histogram of Oriented Gradients
def run_hog(img, visualize=False):
    global NORIENTATIONS, CELL_PIXELSHAPE, CELLS_PER_BLOCK, BLOCK_NORM, POWER_LAW_COMPRESSION
    result = hog(img, orientations=NORIENTATIONS, 
                 pixels_per_cell=CELL_PIXELSHAPE, cells_per_block=CELLS_PER_BLOCK, 
                 block_norm=BLOCK_NORM, visualize=visualize, transform_sqrt=POWER_LAW_COMPRESSION,
                 multichannel=True)
    return result

In [18]:
# Reference: https://www.kaggle.com/manikg/training-svm-classifier-with-hog-features

def log_result(s, on_console=True): # debug log...
    global RESULTS_FH
    if on_console:
        print(s)
    print(s, file=RESULTS_FH)

# Code to load the dataset
def get_dataset_fp(is_train):
    if is_train:
        subroot = 'train'
    else:
        subroot = 'test'
    base_fp = os.path.join(DIR, subroot)
    pos_dirs = []
    neg_dirs = []
    for dir_name in os.listdir(base_fp): # all files & dirs
        subfp = os.path.join(base_fp, dir_name)
        if not os.path.isdir(subfp):
            continue    
        for subdir_name in os.listdir(subfp):
            subsubfp = os.path.join(subfp, subdir_name)
            if not os.path.isdir(subsubfp):
                continue
            if 'positive' in dir_name:
                pos_dirs.append(subsubfp)
            elif 'negative' in dir_name:
                neg_dirs.append(subsubfp)
    return pos_dirs, neg_dirs

def get_subset_images(fp, nimgs=None, preproc=False):
    results = []
    results_fp = []
    count = 0
    for img_fn in os.listdir(fp):
        img_fp = os.path.join(fp, img_fn)
        if os.path.isdir(img_fp) or img_fn[-4:].lower() != '.pnm':
            continue
        img = io.imread(img_fp)
        if preproc: # proprocess as HOG
            hog_fd = run_hog(img)
            results.append(hog_fd)
        else:
            results.append(img)
        results_fp.append(img_fp)
        count += 1 # restrict num imgs loaded
        if nimgs != None and count >= nimgs:
            break
    return results, results_fp

NO_PERSON = 0
IS_PERSON = 1

#def load_images(is_train, shuffle, npos_imgs=None, nneg_imgs=None, save_fn=None):
def load_images(is_train, npos_imgs=None, nneg_imgs=None):
    pos_dirs, neg_dirs = get_dataset_fp(is_train=is_train)    
    x_train = []
    y_train = []
    fp_train = []
    # get preprocessed training/testing data
    for fp in pos_dirs:
        start_time = time.time()
        cur_subset, cur_subset_fp = get_subset_images(fp, npos_imgs, False)
        x_train += cur_subset
        fp_train += cur_subset_fp
        log_result('  * {:.3f}s runtime (images loaded): {}'.format(time.time() - start_time,fp))
        if npos_imgs != None:
            npos_imgs -= len(cur_subset)
            if npos_imgs <= 0:
                break
    pos_length = len(x_train)
    y_train += [IS_PERSON] * pos_length
    
    for fp in neg_dirs:
        start_time = time.time()
        cur_subset, cur_subset_fp = get_subset_images(fp, nneg_imgs, False)
        x_train += cur_subset
        fp_train += cur_subset_fp
        log_result('  * {:.3f}s runtime (images loaded): {}'.format(time.time() - start_time,fp))
        if nneg_imgs != None:
            nneg_imgs -= len(cur_subset)
            if nneg_imgs <= 0:
                break
    y_train += [NO_PERSON] * (len(x_train) - pos_length)
    return x_train, y_train, fp_train

def images_to_hog(x_train, y_train, fp_train, shuffle, save_fn=None):
    # convert each img to hog (intermed step added to cache original unproc'd images)
    start_time = time.time()
    for i in range(len(x_train)):
        x_train[i] = run_hog(x_train[i])
    hog_shape = x_train[0].shape
    log_result('  * {:.3f}s runtime (conversion to HOG)'.format(time.time() - start_time))
    
    # shuffle training data
    #print('Reformatting data...')
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    fp_train_index = np.arange(len(fp_train)) # create unique IDs (from 0)
    
    if shuffle:
        y_train = y_train.reshape(len(y_train),1)
        fp_train_index = fp_train_index.reshape(len(fp_train_index),1)
        
        data_frame = np.hstack((x_train,y_train, fp_train_index))
        #print('Reshuffling data...')
        start_time = time.time()
        np.random.seed(SEED)
        np.random.shuffle(data_frame)
        x_train = data_frame[:,:-2]
        y_train = data_frame[:,-2].ravel()
        fp_train_index = data_frame[:,-1].ravel()
        log_result('  * {:.3f}s runtime (shuffling)'.format(time.time() - start_time))
    
    log_result('HOG descriptor size: ' + str(hog_shape))
    result = (x_train, y_train, fp_train_index, fp_train)
    # save loaded images
    if save_fn != None:
        with open(save_fn, 'wb') as fh:
            pickle.dump(result, fh)
    return result
    
# Code to generate the SVM model
def gen_model(model_fn, x_train, y_train):
    # generate SVM model
    start_time = time.time()
    #clf = svm.SVC(probability=True)
    #clf = linear_model.SGDClassifier()
    clf = calibration.CalibratedClassifierCV(base_estimator = svm.LinearSVC(loss='hinge'), cv=5)
    clf.fit(x_train, y_train)
    log_result('{:.3f}s runtime (SGD training)'.format(time.time() - start_time))
    
    # save SVM model
    with open(model_fn, 'wb') as fh:
        pickle.dump(clf, fh)
    return clf

In [19]:
# Computes statistics for the classifier's performance
def run_test(clf, x_test, y_test):
    start_time = time.time()
    y_pred = clf.predict(x_test)
    y_prob = clf.predict_proba(x_test)
        
    # y_prob: https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
    #  - shape: (nsamples, nclasses), by order of `clf.classes_`
    elapsed_time = time.time() - start_time
    auc = metrics.roc_auc_score(y_test, y_prob[:,IS_PERSON], average='weighted')
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision = metrics.precision_score(y_test, y_pred, average='weighted')
    recall = metrics.recall_score(y_test, y_pred, average='weighted')
    confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
    stats = {'Prediction Time':elapsed_time, 'AUC':auc, 'Accuracy':accuracy, 'Precision':precision, 
             'Recall':recall, 'Confusion Matrix':confusion_matrix}
    
    log_result('  * {:.6f}s prediction time ({:.6f} s/image)'.format(elapsed_time, elapsed_time/len(y_test)))
    log_result('  * AUC (weighted): {:.9f}'.format(auc))
    log_result('  * Accuracy: {:.9f}'.format(accuracy))
    log_result('  * Precision (weighted): {:.9f}'.format(precision))
    log_result('  * Recall (weighted): {:.9f}'.format(recall))
    log_result('  * Confusion Matrix:')
    log_result(str(confusion_matrix))
    return y_pred, y_prob, stats

# Returns all image file paths detected falsely
def filter_failed_fp(y_pred, y_test, fp_test_index, fp_test):
    failed_fp = []
    for i in range(len(y_pred)):
        if y_pred[i] != y_test[i]:
            if y_pred[i] == NO_PERSON:
                label = 'FN'
            else:
                label = 'FP'
            failed_fp.append( (label, fp_test[int(fp_test_index[i])]) )
    return failed_fp

In [20]:
# 1. Generate SVM model (and training images) or load cached result
def get_svm_model(cur_dir_contents, SVM_MODEL, PRE_TRAINING_IMGS):
    global NPOS_TRAINING_IMGS,NNEG_TRAINING_IMGS
    global CACHED_UNPROC_TRAINING # training
    if SVM_MODEL not in cur_dir_contents:
        log_result('\n[Training SVM model]:')
        # get unprocessed images
        if CACHED_UNPROC_TRAINING == None: 
            x_train, y_train, fp_train = load_images(True, NPOS_TRAINING_IMGS,NNEG_TRAINING_IMGS)
            CACHED_UNPROC_TRAINING = (copy.deepcopy(x_train), copy.deepcopy(y_train), copy.deepcopy(fp_train))
        else:
            log_result('  * Loading cached unprocessed images from RAM')
            start_time = time.time()
            x_train, y_train, fp_train = copy.deepcopy(CACHED_UNPROC_TRAINING)
            log_result('    {:.3f}s loading time'.format(time.time() - start_time))
        # process images, generate model
        x_train, y_train, fp_train_index, fp_train = images_to_hog(x_train, y_train, fp_train, True, save_fn=PRE_TRAINING_IMGS)
        clf = gen_model(SVM_MODEL,x_train,y_train)
    else:
        log_result('\n[Loading cached SVM model & training images]:')
        with open(SVM_MODEL, 'rb') as fh:
            clf = pickle.load(fh)
        with open(PRE_TRAINING_IMGS, 'rb') as fh:
            x_train, y_train, fp_train_index, fp_train = pickle.load(fh)
            
    log_result('Number of training images loaded: {}'.format(len(x_train)))
    return x_train, y_train, fp_train_index, fp_train, clf

# 2. Generate test images or load cached result
def get_test_imgs(cur_dir_contents, PRE_TESTING_IMGS):
    global CACHED_UNPROC_TESTING  # testing
    # Generate test images or load cached result
    if PRE_TESTING_IMGS not in cur_dir_contents:
        log_result('\n[Generating test images]:')
        # get unprocessed images
        if CACHED_UNPROC_TESTING == None:
            x_test, y_test, fp_test = load_images(False)
            CACHED_UNPROC_TESTING = (copy.deepcopy(x_test), copy.deepcopy(y_test), copy.deepcopy(fp_test))
        else:
            log_result('  * Loading cached unprocessed test images from RAM')
            start_time = time.time()
            x_test, y_test, fp_test = copy.deepcopy(CACHED_UNPROC_TESTING)
            log_result('    {:.3f}s loading time'.format(time.time() - start_time))
        # process images, generate model
        x_test, y_test, fp_test_index, fp_test = images_to_hog(x_test, y_test, fp_test, True, save_fn=PRE_TESTING_IMGS)
    else:
        log_result('\n[Loading cached test images]:')
        with open(PRE_TESTING_IMGS, 'rb') as fh:
            x_test, y_test, fp_test_index, fp_test = pickle.load(fh)
    
    log_result('Number of testing images loaded: {}'.format(len(x_test)))
    return x_test, y_test, fp_test_index, fp_test
            
# 3. Evaluate performance (for all permutations)
#    call: run_test()
#    manual post-analysis: filter_failed_fp()

# Wrapper for single iteration. Note: all CSV write logic to be contained here. (hack)
def run_hogsvm():
    global NORIENTATIONS, CELL_PIXELSHAPE, CELLS_PER_BLOCK, BLOCK_NORM, POWER_LAW_COMPRESSION
    global RESULTS_FH, CSV_LINE
    base_fn = '_ori({})_cellpix({})_blksze({})_blknrm({})'.format(
        NORIENTATIONS, CELL_PIXELSHAPE[0], CELLS_PER_BLOCK[0], BLOCK_NORM)
    pickle_type = '.pickle'
    text_type = '.txt'
    SVM_MODEL = 'hogsvm_model' + base_fn + pickle_type
    PRE_TRAINING_IMGS = 'hogsvm_train' + base_fn + pickle_type
    PRE_TESTING_IMGS = 'hogsvm_test' + base_fn + pickle_type
    results_fn = 'hogsvm_result' + base_fn + text_type
    
    cur_dir_contents = os.listdir('.')
    RESULTS_FH = open(results_fn, 'w')
    log_result('[Current parameter sweep]:')
    log_result('  * Number of orientations: {}'.format(NORIENTATIONS))
    log_result('  * Cell pixel shape: {}'.format(CELL_PIXELSHAPE))
    log_result('  * Number of cells per block: {}'.format(CELLS_PER_BLOCK))
    log_result('  * Block normalisation method: {}'.format(BLOCK_NORM))
    log_result('  * Power law compression (preprocessing) on: {}'.format(POWER_LAW_COMPRESSION))
    # (NOTE: actually, power law is square root --> slightly different method)
    
    # load svm model and test dataset
    x_train, y_train, fp_train_index, fp_train, clf = get_svm_model(cur_dir_contents, SVM_MODEL, PRE_TRAINING_IMGS)
    x_test, y_test, fp_test_index, fp_test          = get_test_imgs(cur_dir_contents, PRE_TESTING_IMGS)
    # * (CSV): get hog shape (duplicated logic)
    CSV_LINE.append(x_train.shape[1]) # 'Feature Size'
    
    # evaluate on test and training datasets (as a crude check for overfitting)
    log_result('\n[Classifier statistics (on test data)]:')
    y_pred, y_prob, stats_test = run_test(clf, x_test, y_test)
    log_result('\n[Classifier statistics (on training data)]:')
    y_pred_training, y_prob_training, stats_train = run_test(clf, x_train, y_train)
    # * (CSV): add stats
    CSV_LINE += [stats_test['AUC'], stats_test['Accuracy'], stats_test['Precision'], 
                 stats_test['Recall'], stats_train['Accuracy'], stats_test['Prediction Time']]
    tn_teststat, fp_teststat, fn_teststat, tp_teststat = stats_test['Confusion Matrix'].ravel()
    CSV_LINE += [tp_teststat, tn_teststat, fp_teststat, fn_teststat]
    
    # identify all false results
    failed_fp = filter_failed_fp(y_pred, y_test, fp_test_index, fp_test)
    log_result('\n[Falsely detected images]:')
    for label, fp in failed_fp:
        log_result('  * {}: {}'.format(label,fp), on_console=False)

    RESULTS_FH.close()
    RESULTS_FH = None

In [21]:
# [PARAMETER SWEEP]
ntests = len(BLOCK_SIZE_LIST) * len(BLOCK_NORM_LIST) * len(CELL_PIXEL_LIST) * len(ORIENTATION_LIST)
count = 1
test_start_time = time.time()

CACHED_UNPROC_TRAINING = None
CACHED_UNPROC_TESTING = None
CSV_LINE = None

csv_fh = open('hogsvm_result_all.csv', 'w', newline='')
csv_writer = csv.writer(csv_fh)

csv_headers = ['Test', 'Cells per Block', 'Pixels per Cell', 'Orientations', 
               'Feature Size', 'AUC', 'Accuracy', 'Precision', 'Recall', 
               'Training Accuracy', 'Prediction Time (s)',
               'TP', 'TN', 'FP', 'FN'] # must write to in same order
csv_writer.writerow(csv_headers)

# block parameters
for block_size in BLOCK_SIZE_LIST:
    CELLS_PER_BLOCK = (block_size, block_size)
    for BLOCK_NORM in BLOCK_NORM_LIST:
        # cell/orientation parameters
        for cp in CELL_PIXEL_LIST:
            CELL_PIXELSHAPE = (cp,cp)
            for NORIENTATIONS in ORIENTATION_LIST:
                print('\n####[TEST {}/{}]##################################################################'.format(count,ntests))
                CSV_LINE = [count, block_size, cp, NORIENTATIONS]
                run_hogsvm()
                csv_writer.writerow(CSV_LINE)
                count += 1
# close file
csv_fh.close()


####[TEST 1/96]##################################################################
[Current parameter sweep]:
  * Number of orientations: 14
  * Cell pixel shape: (4, 4)
  * Number of cells per block: (1, 1)
  * Block normalisation method: L2-Hys
  * Power law compression (preprocessing) on: True

[Training SVM model]:
  * 0.963s runtime (images loaded): Individual_Component\train\train_positive_A\00000000
  * 1.002s runtime (images loaded): Individual_Component\train\train_positive_A\00000001
  * 1.148s runtime (images loaded): Individual_Component\train\train_positive_A\00000002
  * 1.020s runtime (images loaded): Individual_Component\train\train_negative_A\00000000
  * 0.946s runtime (images loaded): Individual_Component\train\train_negative_A\00000001
  * 0.993s runtime (images loaded): Individual_Component\train\train_negative_A\00000002
  * 47.093s runtime (conversion to HOG)
  * 0.077s runtime (shuffling)
HOG descriptor size: (4480,)
3.985s runtime (SGD training)
Number of train

  * 30.654s runtime (conversion to HOG)
  * 0.059s runtime (shuffling)
HOG descriptor size: (3072,)
3.055s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.127s loading time
  * 47.851s runtime (conversion to HOG)
  * 0.098s runtime (shuffling)
HOG descriptor size: (3072,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 2.037192s prediction time (0.000215 s/image)
  * AUC (weighted): 0.996730450
  * Accuracy: 0.976631067
  * Precision (weighted): 0.976763086
  * Recall (weighted): 0.976631067
  * Confusion Matrix:
[[5864  136]
 [  85 3372]]

[Classifier statistics (on training data)]:
  * 1.264608s prediction time (0.000211 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected images]:

####[TEST 7/96]##

  * 20.424s runtime (conversion to HOG)
  * 0.033s runtime (shuffling)
HOG descriptor size: (1584,)
3.526s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.133s loading time
  * 31.830s runtime (conversion to HOG)
  * 0.052s runtime (shuffling)
HOG descriptor size: (1584,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 1.212614s prediction time (0.000128 s/image)
  * AUC (weighted): 0.996292450
  * Accuracy: 0.973670297
  * Precision (weighted): 0.973843792
  * Recall (weighted): 0.973670297
  * Confusion Matrix:
[[5846  154]
 [  95 3362]]

[Classifier statistics (on training data)]:
  * 0.620342s prediction time (0.000103 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected images]:

####[TEST 13/96]#



4.976s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.133s loading time
  * 25.679s runtime (conversion to HOG)
  * 0.032s runtime (shuffling)
HOG descriptor size: (784,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.537285s prediction time (0.000057 s/image)
  * AUC (weighted): 0.991816459
  * Accuracy: 0.956540129
  * Precision (weighted): 0.957239575
  * Recall (weighted): 0.956540129
  * Confusion Matrix:
[[5730  270]
 [ 141 3316]]

[Classifier statistics (on training data)]:
  * 0.361037s prediction time (0.000060 s/image)
  * AUC (weighted): 0.999632778
  * Accuracy: 0.997833333
  * Precision (weighted): 0.997836044
  * Recall (weighted): 0.997833333
  * Confusion Matrix:
[[2990   10]
 [   3 2997]]

[Falsely detected images]:

####[TEST 17/96]##################################################################
[Current parameter sweep]:
  * Numb



5.159s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.083s loading time
  * 24.762s runtime (conversion to HOG)
  * 0.031s runtime (shuffling)
HOG descriptor size: (840,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.558989s prediction time (0.000059 s/image)
  * AUC (weighted): 0.991553611
  * Accuracy: 0.953579359
  * Precision (weighted): 0.954419553
  * Recall (weighted): 0.953579359
  * Confusion Matrix:
[[5709  291]
 [ 148 3309]]

[Classifier statistics (on training data)]:
  * 0.345855s prediction time (0.000058 s/image)
  * AUC (weighted): 0.999749556
  * Accuracy: 0.998166667
  * Precision (weighted): 0.998168050
  * Recall (weighted): 0.998166667
  * Confusion Matrix:
[[2992    8]
 [   3 2997]]

[Falsely detected images]:

####[TEST 18/96]##################################################################
[Current parameter sweep]:
  * Numb



5.528s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.152s loading time
  * 24.665s runtime (conversion to HOG)
  * 0.031s runtime (shuffling)
HOG descriptor size: (896,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.570605s prediction time (0.000060 s/image)
  * AUC (weighted): 0.992493829
  * Accuracy: 0.958972190
  * Precision (weighted): 0.959538909
  * Recall (weighted): 0.958972190
  * Confusion Matrix:
[[5749  251]
 [ 137 3320]]

[Classifier statistics (on training data)]:
  * 0.356937s prediction time (0.000059 s/image)
  * AUC (weighted): 0.999877111
  * Accuracy: 0.999166667
  * Precision (weighted): 0.999167166
  * Recall (weighted): 0.999166667
  * Confusion Matrix:
[[2996    4]
 [   1 2999]]

[Falsely detected images]:

####[TEST 19/96]##################################################################
[Current parameter sweep]:
  * Numb



3.593s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.102s loading time
  * 24.637s runtime (conversion to HOG)
  * 0.030s runtime (shuffling)
HOG descriptor size: (672,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.459052s prediction time (0.000049 s/image)
  * AUC (weighted): 0.994924742
  * Accuracy: 0.967008565
  * Precision (weighted): 0.967470170
  * Recall (weighted): 0.967008565
  * Confusion Matrix:
[[5791  209]
 [ 103 3354]]

[Classifier statistics (on training data)]:
  * 0.287166s prediction time (0.000048 s/image)
  * AUC (weighted): 0.999882333
  * Accuracy: 0.995166667
  * Precision (weighted): 0.995166722
  * Recall (weighted): 0.995166667
  * Confusion Matrix:
[[2986   14]
 [  15 2985]]

[Falsely detected images]:

####[TEST 20/96]##################################################################
[Current parameter sweep]:
  * Numb



3.881s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.141s loading time
  * 24.488s runtime (conversion to HOG)
  * 0.023s runtime (shuffling)
HOG descriptor size: (720,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.470434s prediction time (0.000050 s/image)
  * AUC (weighted): 0.994765645
  * Accuracy: 0.967220049
  * Precision (weighted): 0.967883453
  * Recall (weighted): 0.967220049
  * Confusion Matrix:
[[5778  222]
 [  88 3369]]

[Classifier statistics (on training data)]:
  * 0.287816s prediction time (0.000048 s/image)
  * AUC (weighted): 0.999913556
  * Accuracy: 0.997833333
  * Precision (weighted): 0.997833831
  * Recall (weighted): 0.997833333
  * Confusion Matrix:
[[2992    8]
 [   5 2995]]

[Falsely detected images]:

####[TEST 21/96]##################################################################
[Current parameter sweep]:
  * Numb



4.051s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.081s loading time
  * 24.880s runtime (conversion to HOG)
  * 0.030s runtime (shuffling)
HOG descriptor size: (768,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.498709s prediction time (0.000053 s/image)
  * AUC (weighted): 0.994939350
  * Accuracy: 0.967008565
  * Precision (weighted): 0.967611978
  * Recall (weighted): 0.967008565
  * Confusion Matrix:
[[5781  219]
 [  93 3364]]

[Classifier statistics (on training data)]:
  * 0.356345s prediction time (0.000059 s/image)
  * AUC (weighted): 0.999915778
  * Accuracy: 0.997666667
  * Precision (weighted): 0.997666888
  * Recall (weighted): 0.997666667
  * Confusion Matrix:
[[2992    8]
 [   6 2994]]

[Falsely detected images]:

####[TEST 22/96]##################################################################
[Current parameter sweep]:
  * Numb



1.214s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.155s loading time
  * 18.991s runtime (conversion to HOG)
  * 0.020s runtime (shuffling)
HOG descriptor size: (280,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.182950s prediction time (0.000019 s/image)
  * AUC (weighted): 0.990506026
  * Accuracy: 0.957809030
  * Precision (weighted): 0.958955786
  * Recall (weighted): 0.957809030
  * Confusion Matrix:
[[5711  289]
 [ 110 3347]]

[Classifier statistics (on training data)]:
  * 0.124775s prediction time (0.000021 s/image)
  * AUC (weighted): 0.994912667
  * Accuracy: 0.971500000
  * Precision (weighted): 0.971557058
  * Recall (weighted): 0.971500000
  * Confusion Matrix:
[[2898  102]
 [  69 2931]]

[Falsely detected images]:

####[TEST 23/96]##################################################################
[Current parameter sweep]:
  * Numb



1.366s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.151s loading time
  * 19.219s runtime (conversion to HOG)
  * 0.020s runtime (shuffling)
HOG descriptor size: (300,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.173674s prediction time (0.000018 s/image)
  * AUC (weighted): 0.989356234
  * Accuracy: 0.952521941
  * Precision (weighted): 0.953911031
  * Recall (weighted): 0.952521941
  * Confusion Matrix:
[[5677  323]
 [ 126 3331]]

[Classifier statistics (on training data)]:
  * 0.100610s prediction time (0.000017 s/image)
  * AUC (weighted): 0.995168111
  * Accuracy: 0.972666667
  * Precision (weighted): 0.972768364
  * Recall (weighted): 0.972666667
  * Confusion Matrix:
[[2896  104]
 [  60 2940]]

[Falsely detected images]:

####[TEST 24/96]##################################################################
[Current parameter sweep]:
  * Numb



1.317s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.161s loading time
  * 18.515s runtime (conversion to HOG)
  * 0.020s runtime (shuffling)
HOG descriptor size: (320,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.201434s prediction time (0.000021 s/image)
  * AUC (weighted): 0.991061469
  * Accuracy: 0.955588453
  * Precision (weighted): 0.956745493
  * Recall (weighted): 0.955588453
  * Confusion Matrix:
[[5701  299]
 [ 121 3336]]

[Classifier statistics (on training data)]:
  * 0.122923s prediction time (0.000020 s/image)
  * AUC (weighted): 0.996389333
  * Accuracy: 0.975500000
  * Precision (weighted): 0.975580373
  * Recall (weighted): 0.975500000
  * Confusion Matrix:
[[2907   93]
 [  54 2946]]

[Falsely detected images]:

####[TEST 25/96]##################################################################
[Current parameter sweep]:
  * Numb

  * 0.204s runtime (shuffling)
HOG descriptor size: (10560,)
11.542s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.208s loading time
  * 64.104s runtime (conversion to HOG)
  * 0.324s runtime (shuffling)
HOG descriptor size: (10560,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 7.774299s prediction time (0.000822 s/image)
  * AUC (weighted): 0.998890078
  * Accuracy: 0.985513376
  * Precision (weighted): 0.985588761
  * Recall (weighted): 0.985513376
  * Confusion Matrix:
[[5911   89]
 [  48 3409]]

[Classifier statistics (on training data)]:
  * 4.899466s prediction time (0.000817 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected images]:

####[TEST 31/96]######################################

  * 25.261s runtime (conversion to HOG)
  * 0.103s runtime (shuffling)
HOG descriptor size: (5120,)
8.685s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.145s loading time
  * 40.179s runtime (conversion to HOG)
  * 0.188s runtime (shuffling)
HOG descriptor size: (5120,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 4.199912s prediction time (0.000444 s/image)
  * AUC (weighted): 0.998889210
  * Accuracy: 0.986253569
  * Precision (weighted): 0.986319141
  * Recall (weighted): 0.986253569
  * Confusion Matrix:
[[5916   84]
 [  46 3411]]

[Classifier statistics (on training data)]:
  * 2.434644s prediction time (0.000406 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected images]:

####[TEST 37/96]#

  * 16.804s runtime (conversion to HOG)
  * 0.054s runtime (shuffling)
HOG descriptor size: (2688,)
7.733s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.086s loading time
  * 24.404s runtime (conversion to HOG)
  * 0.088s runtime (shuffling)
HOG descriptor size: (2688,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 1.903521s prediction time (0.000201 s/image)
  * AUC (weighted): 0.997161556
  * Accuracy: 0.975467907
  * Precision (weighted): 0.975656602
  * Recall (weighted): 0.975467907
  * Confusion Matrix:
[[5852  148]
 [  84 3373]]

[Classifier statistics (on training data)]:
  * 1.165349s prediction time (0.000194 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected images]:

####[TEST 43/96]#



2.443s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.140s loading time
  * 19.482s runtime (conversion to HOG)
  * 0.033s runtime (shuffling)
HOG descriptor size: (672,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.516969s prediction time (0.000055 s/image)
  * AUC (weighted): 0.995492865
  * Accuracy: 0.969123401
  * Precision (weighted): 0.969727233
  * Recall (weighted): 0.969123401
  * Confusion Matrix:
[[5790  210]
 [  82 3375]]

[Classifier statistics (on training data)]:
  * 0.328006s prediction time (0.000055 s/image)
  * AUC (weighted): 0.998126222
  * Accuracy: 0.980166667
  * Precision (weighted): 0.980224774
  * Recall (weighted): 0.980166667
  * Confusion Matrix:
[[2924   76]
 [  43 2957]]

[Falsely detected images]:

####[TEST 47/96]##################################################################
[Current parameter sweep]:
  * Numb



2.602s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.142s loading time
  * 20.001s runtime (conversion to HOG)
  * 0.034s runtime (shuffling)
HOG descriptor size: (720,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.564405s prediction time (0.000060 s/image)
  * AUC (weighted): 0.995211069
  * Accuracy: 0.965210955
  * Precision (weighted): 0.966100789
  * Recall (weighted): 0.965210955
  * Confusion Matrix:
[[5756  244]
 [  85 3372]]

[Classifier statistics (on training data)]:
  * 0.362867s prediction time (0.000060 s/image)
  * AUC (weighted): 0.998314222
  * Accuracy: 0.980166667
  * Precision (weighted): 0.980200014
  * Recall (weighted): 0.980166667
  * Confusion Matrix:
[[2928   72]
 [  47 2953]]

[Falsely detected images]:

####[TEST 48/96]##################################################################
[Current parameter sweep]:
  * Numb



3.113s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.129s loading time
  * 20.674s runtime (conversion to HOG)
  * 0.040s runtime (shuffling)
HOG descriptor size: (768,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.584277s prediction time (0.000062 s/image)
  * AUC (weighted): 0.995478305
  * Accuracy: 0.967748758
  * Precision (weighted): 0.968416184
  * Recall (weighted): 0.967748758
  * Confusion Matrix:
[[5780  220]
 [  85 3372]]

[Classifier statistics (on training data)]:
  * 0.400252s prediction time (0.000067 s/image)
  * AUC (weighted): 0.998455667
  * Accuracy: 0.982666667
  * Precision (weighted): 0.982688119
  * Recall (weighted): 0.982666667
  * Confusion Matrix:
[[2938   62]
 [  42 2958]]

[Falsely detected images]:

####[TEST 49/96]##################################################################
[Current parameter sweep]:
  * Numb

  * 0.354s runtime (shuffling)
HOG descriptor size: (20160,)
17.352s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.107s loading time
  * 45.620s runtime (conversion to HOG)
  * 0.632s runtime (shuffling)
HOG descriptor size: (20160,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 13.056534s prediction time (0.001381 s/image)
  * AUC (weighted): 0.999077186
  * Accuracy: 0.984878926
  * Precision (weighted): 0.984999824
  * Recall (weighted): 0.984878926
  * Confusion Matrix:
[[5901   99]
 [  44 3413]]

[Classifier statistics (on training data)]:
  * 8.238863s prediction time (0.001373 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected images]:

####[TEST 55/96]#####################################

    0.074s loading time
  * 21.293s runtime (conversion to HOG)
  * 0.180s runtime (shuffling)
HOG descriptor size: (9072,)
14.903s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.128s loading time
  * 39.562s runtime (conversion to HOG)
  * 0.341s runtime (shuffling)
HOG descriptor size: (9072,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 6.903444s prediction time (0.000730 s/image)
  * AUC (weighted): 0.998865056
  * Accuracy: 0.984561700
  * Precision (weighted): 0.984666791
  * Recall (weighted): 0.984561700
  * Confusion Matrix:
[[5902   98]
 [  48 3409]]

[Classifier statistics (on training data)]:
  * 4.481843s prediction time (0.000747 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected im

  * 13.869s runtime (conversion to HOG)
  * 0.079s runtime (shuffling)
HOG descriptor size: (4320,)
12.199s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.078s loading time
  * 21.855s runtime (conversion to HOG)
  * 0.135s runtime (shuffling)
HOG descriptor size: (4320,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 3.047130s prediction time (0.000322 s/image)
  * AUC (weighted): 0.996722013
  * Accuracy: 0.971978429
  * Precision (weighted): 0.972391753
  * Recall (weighted): 0.971978429
  * Confusion Matrix:
[[5816  184]
 [  81 3376]]

[Classifier statistics (on training data)]:
  * 1.823162s prediction time (0.000304 s/image)
  * AUC (weighted): 0.999999889
  * Accuracy: 0.999666667
  * Precision (weighted): 0.999666889
  * Recall (weighted): 0.999666667
  * Confusion Matrix:
[[2998    2]
 [   0 3000]]

[Falsely detected images]:

####[TEST 67/96]



1.932s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.123s loading time
  * 15.149s runtime (conversion to HOG)
  * 0.031s runtime (shuffling)
HOG descriptor size: (756,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.522532s prediction time (0.000055 s/image)
  * AUC (weighted): 0.994959069
  * Accuracy: 0.963730570
  * Precision (weighted): 0.964614746
  * Recall (weighted): 0.963730570
  * Confusion Matrix:
[[5750  250]
 [  93 3364]]

[Classifier statistics (on training data)]:
  * 0.320654s prediction time (0.000053 s/image)
  * AUC (weighted): 0.997548444
  * Accuracy: 0.977333333
  * Precision (weighted): 0.977350518
  * Recall (weighted): 0.977333333
  * Confusion Matrix:
[[2923   77]
 [  59 2941]]

[Falsely detected images]:

####[TEST 71/96]##################################################################
[Current parameter sweep]:
  * Numb



1.923s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.230s loading time
  * 16.157s runtime (conversion to HOG)
  * 0.032s runtime (shuffling)
HOG descriptor size: (810,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.549932s prediction time (0.000058 s/image)
  * AUC (weighted): 0.994563735
  * Accuracy: 0.962990377
  * Precision (weighted): 0.963963560
  * Recall (weighted): 0.962990377
  * Confusion Matrix:
[[5742  258]
 [  92 3365]]

[Classifier statistics (on training data)]:
  * 0.336101s prediction time (0.000056 s/image)
  * AUC (weighted): 0.997659222
  * Accuracy: 0.973833333
  * Precision (weighted): 0.973890674
  * Recall (weighted): 0.973833333
  * Confusion Matrix:
[[2905   95]
 [  62 2938]]

[Falsely detected images]:

####[TEST 72/96]##################################################################
[Current parameter sweep]:
  * Numb

  * 0.503s runtime (shuffling)
HOG descriptor size: (28080,)
25.810s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.512s loading time
  * 42.551s runtime (conversion to HOG)
  * 1.111s runtime (shuffling)
HOG descriptor size: (28080,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 19.540143s prediction time (0.002066 s/image)
  * AUC (weighted): 0.998995757
  * Accuracy: 0.984032991
  * Precision (weighted): 0.984129995
  * Recall (weighted): 0.984032991
  * Confusion Matrix:
[[5901   99]
 [  52 3405]]

[Classifier statistics (on training data)]:
  * 12.094852s prediction time (0.002016 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected images]:

####[TEST 78/96]####################################

    0.063s loading time
  * 16.009s runtime (conversion to HOG)
  * 0.195s runtime (shuffling)
HOG descriptor size: (11520,)
19.398s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.121s loading time
  * 25.274s runtime (conversion to HOG)
  * 0.312s runtime (shuffling)
HOG descriptor size: (11520,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 7.275641s prediction time (0.000769 s/image)
  * AUC (weighted): 0.998408784
  * Accuracy: 0.980226287
  * Precision (weighted): 0.980340598
  * Recall (weighted): 0.980226287
  * Confusion Matrix:
[[5882  118]
 [  69 3388]]

[Classifier statistics (on training data)]:
  * 4.817363s prediction time (0.000803 s/image)
  * AUC (weighted): 1.000000000
  * Accuracy: 1.000000000
  * Precision (weighted): 1.000000000
  * Recall (weighted): 1.000000000
  * Confusion Matrix:
[[3000    0]
 [   0 3000]]

[Falsely detected 

  * 13.893s runtime (conversion to HOG)
  * 0.084s runtime (shuffling)
HOG descriptor size: (4800,)
17.400s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.079s loading time
  * 20.792s runtime (conversion to HOG)
  * 0.135s runtime (shuffling)
HOG descriptor size: (4800,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 2.932421s prediction time (0.000310 s/image)
  * AUC (weighted): 0.995918523
  * Accuracy: 0.968594692
  * Precision (weighted): 0.969179805
  * Recall (weighted): 0.968594692
  * Confusion Matrix:
[[5789  211]
 [  86 3371]]

[Classifier statistics (on training data)]:
  * 1.793665s prediction time (0.000299 s/image)
  * AUC (weighted): 0.999893333
  * Accuracy: 0.997833333
  * Precision (weighted): 0.997834716
  * Recall (weighted): 0.997833333
  * Confusion Matrix:
[[2991    9]
 [   4 2996]]

[Falsely detected images]:

####[TEST 90/96]

  * 9.132s runtime (conversion to HOG)
  * 0.010s runtime (shuffling)
HOG descriptor size: (480,)
0.713s runtime (SGD training)
Number of training images loaded: 6000

[Generating test images]:
  * Loading cached unprocessed test images from RAM
    0.097s loading time
  * 14.481s runtime (conversion to HOG)
  * 0.021s runtime (shuffling)
HOG descriptor size: (480,)
Number of testing images loaded: 9457

[Classifier statistics (on test data)]:
  * 0.306676s prediction time (0.000032 s/image)
  * AUC (weighted): 0.993444171
  * Accuracy: 0.959712382
  * Precision (weighted): 0.960882202
  * Recall (weighted): 0.959712382
  * Confusion Matrix:
[[5718  282]
 [  99 3358]]

[Classifier statistics (on training data)]:
  * 0.187013s prediction time (0.000031 s/image)
  * AUC (weighted): 0.996541556
  * Accuracy: 0.970000000
  * Precision (weighted): 0.970060377
  * Recall (weighted): 0.970000000
  * Confusion Matrix:
[[2893  107]
 [  73 2927]]

[Falsely detected images]:

####[TEST 96/96]####

In [22]:
if RESULTS_FH != None:
    RESULTS_FH.close()
print('{:.3f}s elapsed (entire test)'.format(time.time() - test_start_time))
print('DONE')

9731.245s elapsed (entire test)
DONE
