In [1]:
%matplotlib inline

import os
import tarfile
import shutil
import hashlib
import glob
import random
from datetime import datetime
from typing import *


import requests
from joblib import Parallel, delayed
from pathlib import Path
from PIL import Image, ImageOps
import numpy as np
from sklearn.metrics import *

import matplotlib.pyplot as plt



In [2]:
from pyunpack import Archive

In [3]:
Archive("dataset.zip").extractall(".")

In [4]:
data_dir="/home/angshul1994/dataset"
train_data_dir=os.path.join(data_dir,"trainset")
train_faces=os.path.join(train_data_dir,"faces")
train_non_faces=os.path.join(train_data_dir,"non-faces")

In [None]:
data_dir="C:\\Users\\ANGSHUL\\Downloads\\dataset"
train_data_dir=os.path.join(data_dir,"trainset")
train_faces=os.path.join(train_data_dir,"faces")
train_non_faces=os.path.join(train_data_dir,"non-faces")

In [5]:
face_image_files = glob.glob(os.path.join(train_faces, '**', '*.png'), recursive=True)
len(face_image_files)

499

In [6]:
background_image_files=glob.glob(os.path.join(train_non_faces, '**', '*.png'), recursive=True)
len(background_image_files)

2000

In [7]:
def image_to_array(img: Image.Image):
    return np.array(img).astype(np.float32) / 255.

def integral_image(img: np.ndarray):
    integral = np.cumsum(np.cumsum(img, axis=0), axis=1)
    return np.pad(integral, (1, 1), 'constant', constant_values=(0, 0))[:-1, :-1]

In [9]:
WINDOW_SIZE=19

In [12]:
class Feature:
    def __init__(self, x: int, y: int, width: int, height: int):
        self.x = x
        self.y = y
        self.width = width
        self.height = height
        
    def __call__(self, integral_image: np.ndarray) -> float:
        try:
            return np.sum(np.multiply(integral_image[self.y_pos, self.x_pos], self.coeffs))
        except IndexError as e:
            raise IndexError(str(e) + ' in ' + str(self))
    
    def __repr__(self):
        return f'{self.__class__.__name__}(x={self.x}, y={self.y}, width={self.width}, height={self.height})'    
              
   

In [13]:
class Feature2h(Feature):
    def __init__(self, x: int, y: int, width: int, height: int):
        super().__init__(x, y, width, height)
        hw = width // 2
        self.x_pos = [x,      x + hw,     x,          x + hw,
                         x + hw, x + width,  x + hw,     x + width]
        self.y_pos = [y,      y,          y + height, y + height,
                         y,      y,          y + height, y + height]
        self.coeffs   = [1,     -1,         -1,          1,
                         -1,     1,          1,         -1]

In [14]:
class Feature2v(Feature):
    def __init__(self, x: int, y: int, width: int, height: int):
        super().__init__(x, y, width, height)
        hh = height // 2        
        self.x_pos = [x,      x + width,  x,          x + width,
                         x,      x + width,  x,          x + width]
        self.y_pos = [y,      y,          y + hh,     y + hh,
                         y + hh, y + hh,     y + height, y + height]
        self.coeffs   = [-1,     1,          1,         -1,
                         1,     -1,         -1,          1]

In [15]:
class Feature3h(Feature):
    def __init__(self, x: int, y: int, width: int, height: int):
        super().__init__(x, y, width, height)
        tw = width // 3
        self.x_pos = [x,        x + tw,    x,          x + tw,
                         x + tw,   x + 2*tw,  x + tw,     x + 2*tw,
                         x + 2*tw, x + width, x + 2*tw,   x + width]
        self.y_pos = [y,        y,         y + height, y + height,
                         y,        y,         y + height, y + height,
                         y,        y,         y + height, y + height]
        self.coeffs   = [-1,       1,         1,         -1,
                          1,      -1,        -1,          1,
                         -1,       1,         1,         -1]

In [16]:
class Feature3v(Feature):
    def __init__(self, x: int, y: int, width: int, height: int):
        super().__init__(x, y, width, height)
        th = height // 3
        self.x_pos = [x,        x + width,  x,          x + width,
                         x,        x + width,  x,          x + width,
                         x,        x + width,  x,          x + width]
        self.y_pos = [y,        y,          y + th,     y + th,
                         y + th,   y + th,     y + 2*th,   y + 2*th,
                         y + 2*th, y + 2*th,   y + height, y + height]
        self.coeffs   = [-1,        1,         1,         -1,
                          1,       -1,        -1,          1,
                         -1,        1,         1,         -1]

In [17]:
class Feature4(Feature):
    def __init__(self, x: int, y: int, width: int, height: int):
        super().__init__(x, y, width, height)
        hw = width // 2
        hh = height // 2
        self.x_pos = [x,      x + hw,     x,          x + hw,     # upper row
                         x + hw, x + width,  x + hw,     x + width,
                         x,      x + hw,     x,          x + hw,     # lower row
                         x + hw, x + width,  x + hw,     x + width]
        self.y_pos = [y,      y,          y + hh,     y + hh,     # upper row
                         y,      y,          y + hh,     y + hh,
                         y + hh, y + hh,     y + height, y + height, # lower row
                         y + hh, y + hh,     y + height, y + height]
        self.coeffs   = [1,     -1,         -1,          1,          # upper row
                         -1,     1,          1,         -1,
                         -1,     1,          1,         -1,          # lower row
                          1,    -1,         -1,          1]

In [18]:
Size = NamedTuple('Size', [('height', int), ('width', int)])
Location = NamedTuple('Location', [('top', int), ('left', int)])


def get_positions(base_shape: Size, window_size: int = WINDOW_SIZE):
    return (Location(left=x, top=y)
            for x in range(0, window_size-base_shape.width+1) 
            for y in range(0, window_size-base_shape.height+1))

def get_shapes(base_shape: Size, window_size: int = WINDOW_SIZE):
    base_height = base_shape.height
    base_width = base_shape.width
    return (Size(height=height, width=width)
            for width in range(base_width, window_size + 1, base_width)
            for height in range(base_height, window_size + 1, base_height))

In [19]:
feature2h = list(Feature2h(location.left, location.top, shape.width, shape.height)
                 for shape in get_shapes(Size(height=1, width=2), WINDOW_SIZE)
                 for location in get_positions(shape, WINDOW_SIZE))

feature2v = list(Feature2v(location.left, location.top, shape.width, shape.height)
                 for shape in get_shapes(Size(height=2, width=1), WINDOW_SIZE)
                 for location in get_positions(shape, WINDOW_SIZE))

feature3h = list(Feature3h(location.left, location.top, shape.width, shape.height)
                 for shape in get_shapes(Size(height=1, width=3), WINDOW_SIZE)
                 for location in get_positions(shape, WINDOW_SIZE))

feature3v = list(Feature3v(location.left, location.top, shape.width, shape.height)
                 for shape in get_shapes(Size(height=3, width=1), WINDOW_SIZE)
                 for location in get_positions(shape, WINDOW_SIZE))

feature4  = list(Feature4(location.left, location.top, shape.width, shape.height)
                 for shape in get_shapes(Size(height=2, width=2), WINDOW_SIZE)
                 for location in get_positions(shape, WINDOW_SIZE))

features = feature2h + feature2v + feature3h + feature3v + feature4

print(f'Number of feature2h features: {len(feature2h)}')
print(f'Number of feature2v features: {len(feature2v)}')
print(f'Number of feature3h features: {len(feature3h)}')
print(f'Number of feature3v features: {len(feature3v)}')
print(f'Number of feature4 features:  {len(feature4)}')
print(f'Total number of features:     {len(features)}')

Number of feature2h features: 17100
Number of feature2v features: 17100
Number of feature3h features: 10830
Number of feature3v features: 10830
Number of feature4 features:  8100
Total number of features:     63960


In [20]:

def build_data(p: int, n: int, face_files, background_files):
    xs = []
    xs.extend([image_to_array(Image.open(f).convert('L')) for f in random.sample(face_image_files, p)])
    xs.extend([image_to_array(Image.open(f).convert('L')) for f in np.random.choice(background_image_files, n, replace=True)])
    ys = np.hstack([np.ones((p,)), np.zeros((n,))])
    return np.array(xs), ys

In [21]:

image_samples, _ = build_data(499, 2000, face_image_files, background_image_files)

sample_mean = image_samples.mean()
sample_std = image_samples.std()
del image_samples

print(f'Sample mean: {sample_mean}, standard deviation: {sample_std}')

Sample mean: 0.4319107234477997, standard deviation: 0.21371206641197205


In [22]:

def normalize(p: int, n: int, face_files, background_files, mean: float = sample_mean, std: float = sample_std):
    xs, ys = build_data(p, n, face_files, background_files)
    xs=(xs-mean)/std
    return xs, ys

In [23]:
xs, ys = normalize(499, 2000, face_image_files, background_image_files)

In [24]:
xis = np.array([integral_image(x) for x in xs])
xs.shape, xis.shape

((2499, 19, 19), (2499, 20, 20))

In [25]:
Threshold = NamedTuple('Threshold', [('threshold', float), ('polarity', float)])

ClassifierResult = NamedTuple('ClassifierResult', [('threshold', float), ('polarity', int), 
                                                   ('classification_error', float),
                                                   ('classifier', Callable[[np.ndarray], float])])

WeakClassifier = NamedTuple('WeakClassifier', [('threshold', float), ('polarity', int), 
                                               ('alpha', float), 
                                               ('classifier', Callable[[np.ndarray], float])])

In [26]:
def get_weak_classifier(x: np.ndarray, classifier: WeakClassifier) -> float:
    polarity=classifier.polarity
    theta=classifier.threshold
    feature=classifier.classifier
    return (np.sign((polarity * theta) - (polarity * feature(x))) + 1) // 2

In [27]:

def create_strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
    sum_hypotheses = 0.
    sum_alphas = 0.
    for c in weak_classifiers:
        sum_hypotheses += c.alpha * get_weak_classifier(x, c)
        sum_alphas += c.alpha
    return 1 if (sum_hypotheses >= .5*sum_alphas) else 0

In [29]:

def get_threshold(ys: np.ndarray, ws: np.ndarray, zs: np.ndarray):  
    # Sort according to score
    p = np.argsort(zs)
    zs, ys, ws = zs[p], ys[p], ws[p]
    
    # Determine the best threshold: build running sums
    s_minus, s_plus = 0., 0.
    t_minus, t_plus = 0., 0.
    s_minuses, s_pluses = [], []
    
    for y, w in zip(ys, ws):
        if y < .5:
            s_minus += w
            t_minus += w
        else:
            s_plus += w
            t_plus += w
        s_minuses.append(s_minus)
        s_pluses.append(s_plus)
    
    # Determine the best threshold: select optimal threshold.
    min_e = float('inf')
    min_z=0 
    polarity=0
    for z, s_m, s_p in zip(zs, s_minuses, s_pluses):
        error_1 = s_p + (t_minus - s_m)
        error_2 = s_m + (t_plus - s_p)
        if error_1 < min_e:
            min_e = error_1
            min_z = z
            polarity = -1
        elif error_2 < min_e:
            min_e = error_2
            min_z = z
            polarity = 1
    return Threshold(threshold=min_z, polarity=polarity

In [30]:
def create_features(f: Feature, xis: np.ndarray, ys: np.ndarray, ws: np.ndarray, parallel: Optional[Parallel] = None) -> ClassifierResult:   
    if parallel is None:
        parallel = Parallel(n_jobs=-1, backend='threading')
    
    # Determine all feature values
    zs = np.array(parallel(delayed(f)(x) for x in xis))
    
    # Determine the best threshold
    result = get_threshold(ys, ws, zs)
            
    # Determine the classification error
    classification_error = 0.
    for x, y, w in zip(xis, ys, ws):
        h = (np.sign((result.polarity *result.threshold) - (result.polarity * f(x))) + 1) // 2
        classification_error += w * np.abs(h - y)
            
    return ClassifierResult(threshold=result.threshold, polarity=result.polarity, 
                            classification_error=classification_error, classifier=f)

In [None]:
def normalize_weights(w: np.ndarray) -> np.ndarray:
    return w / w.sum()

In [None]:
status_check= 2000
random_prob = 0.25

def build_weak_classifiers(prefix: str, num_features: int, xis: np.ndarray, ys: np.ndarray, features: List[Feature], ws: Optional[np.ndarray] = None) -> Tuple[List[WeakClassifier], List[float]]:
    if ws is None:
        m = len(ys[ys < .5])  # number of negative example
        l = len(ys[ys > .5])  # number of positive examples

        # Initialize the weights
        ws = np.zeros_like(ys)
        ws[ys < .5] = 1./(2.*m)
        ws[ys > .5] = 1./(2.*l)
    
    # Keep track of the history of the example weights.
    w_history = [ws]

    total_start_time = datetime.now()
    with Parallel(n_jobs=-1, backend='threading') as parallel:
        weak_classifiers = []  # type: List[WeakClassifier]
        for t in range(num_features):
            print(f'Building weak classifier {t+1}/{num_features} ...')
            start_time = datetime.now()
            
            # Normalize the weights
            ws = normalize_weights(ws)
            
            status_counter = status_check

            # Select best weak classifier for this round
            best = ClassifierResult(polarity=0, threshold=0, classification_error=float('inf'), classifier=None)
            for i, f in enumerate(features):
                status_counter -= 1
                improved = False

                # Python runs singlethreaded. To speed things up,
                # we're only anticipating every other feature, give or take.
                if random_prob < 1.:
                    skip_probability = np.random.random()
                    if skip_probability > random_prob:
                        continue

                result = create_features(f, xis, ys, ws, parallel)
                if result.classification_error < best.classification_error:
                    improved = True
                    best = result

                # Print status every couple of iterations.
                if improved or status_counter == 0:
                    current_time = datetime.now()
                    duration = current_time - start_time
                    total_duration = current_time - total_start_time
                    status_counter = status_check
                    if improved:
                        print(f't={t+1}/{num_features} {total_duration.total_seconds():.2f}s ({duration.total_seconds():.2f}s in this stage) {i+1}/{len(features)} {100*i/len(features):.2f}% evaluated. Classification error improved to {best.classification_error:.5f} using {str(best.classifier)} ...')
                    else:
                        print(f't={t+1}/{num_features} {total_duration.total_seconds():.2f}s ({duration.total_seconds():.2f}s in this stage) {i+1}/{len(features)} {100*i/len(features):.2f}% evaluated.')

            # After the best classifier was found, determine alpha
            beta = best.classification_error / (1 - best.classification_error)
            alpha = np.log(1. / beta)
            
            # Build the weak classifier
            classifier = WeakClassifier(threshold=best.threshold, polarity=best.polarity, classifier=best.classifier, alpha=alpha)
            
            # Update the weights for misclassified examples
            for i, (x, y) in enumerate(zip(xis, ys)):
                h = get_weak_classifier(x, classifier)
                e = np.abs(h - y)
                ws[i] = ws[i] * np.power(beta, 1-e)
                
            # Register this weak classifier           
            weak_classifiers.append(classifier)
            w_history.append(ws)
        
    
    print(f'Done building {num_features} weak classifiers.')
    return weak_classifiers, w_history

Create Test set


In [None]:
test_data_dir=os.path.join(data_dir,"testset")
test_faces=os.path.join(test_data_dir,"faces")
test_non_faces=os.path.join(test_data_dir,"non-faces")

In [None]:
face_image_files_test = glob.glob(os.path.join(test_faces, '**', '*.png'), recursive=True)
background_image_files_test=glob.glob(os.path.join(test_non_faces, '**', '*.png'), recursive=True)

In [None]:
image_samples_test, _ = build_data(471, 2000, face_image_files_test, background_image_files_test)

sample_mean = image_samples_test.mean()
sample_std = image_samples_test.std()

In [None]:
test_xs, test_ys = normalize(471, 2000,face_image_files_test, background_image_files_test)
test_xis = np.array([integral_image(x) for x in test_xs])

In [None]:
predicted_scores = NamedTuple('PredictionStats', [('tn', int), ('fp', int), ('fn', int), ('tp', int)])

def predict(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[np.ndarray, predicted_scores]:
    c = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = c.ravel()
    return c, predicted_scores(tn=tn, fp=fp, fn=fn, tp=tp)

In [33]:
weak_classifiers_1, w_history = build_weak_classifiers('1 round', 1, xis, ys, features)

Building weak classifier 1/1 ...


Compilation is falling back to object mode WITH looplifting enabled because Function "find_best_threshold" failed type inference due to: Invalid use of Function(<class 'float'>) with argument(s) of type(s): (Literal[str](inf))
 * parameterized
In definition 0:
    TypeError: float() only support for numbers
    raised from /home/angshul1994/.local/lib/python3.6/site-packages/numba/typing/builtins.py:889
In definition 1:
    TypeError: float() only support for numbers
    raised from /home/angshul1994/.local/lib/python3.6/site-packages/numba/typing/builtins.py:889
This error is usually caused by passing an argument of a type that is unsupported by the named function.
[1] During: resolving callee type: Function(<class 'float'>)
[2] During: typing of call at <ipython-input-29-51fd85f81679> (21)


File "<ipython-input-29-51fd85f81679>", line 21:
def find_best_threshold(zs: np.ndarray, t_minus: float, t_plus: float, s_minuses: List[float], s_pluses: List[float]) -> ThresholdPolarity:
    mi

t=1/1 1.63s (1.63s in this stage) 1/63960 0.00% evaluated. Classification error improved to 0.33722 using Feature2h(x=0, y=0, width=2, height=1) ...
t=1/1 2.19s (2.19s in this stage) 5/63960 0.01% evaluated. Classification error improved to 0.26680 using Feature2h(x=0, y=4, width=2, height=1) ...
t=1/1 5.46s (5.45s in this stage) 29/63960 0.04% evaluated. Classification error improved to 0.25766 using Feature2h(x=1, y=9, width=2, height=1) ...
t=1/1 14.92s (14.91s in this stage) 106/63960 0.16% evaluated. Classification error improved to 0.24651 using Feature2h(x=5, y=10, width=2, height=1) ...
t=1/1 22.11s (22.11s in this stage) 160/63960 0.25% evaluated. Classification error improved to 0.17865 using Feature2h(x=8, y=7, width=2, height=1) ...
t=1/1 59.20s (59.20s in this stage) 491/63960 0.77% evaluated. Classification error improved to 0.15235 using Feature2h(x=8, y=4, width=2, height=2) ...
t=1/1 101.68s (101.68s in this stage) 805/63960 1.26% evaluated. Classification error improv

Compilation is falling back to object mode WITH looplifting enabled because Function "run_weak_classifier" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at <ipython-input-26-e47a16af2a40> (8)

File "<ipython-input-26-e47a16af2a40>", line 8:
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -> float:
    return weak_classifier(x=x, f=c.classifier, polarity=c.polarity, theta=c.threshold)
    ^

  @jit

File "<ipython-input-26-e47a16af2a40>", line 7:
@jit
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -> float:
^

  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "<ipython-input-26-e47a16af2a40>", line 7:
@jit
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -

Done building 1 weak classifiers.


In [34]:
weak_classifiers_1

[WeakClassifier(threshold=-2.403821110725403, polarity=1, alpha=1.9315405703473476, classifier=Feature2h(x=8, y=1, width=2, height=10))]

In [43]:
ys_strong = np.array([strong_classifier(x, weak_classifiers_1) for x in test_xis])
c, s = predict(test_ys, ys_strong)
print(f'Accuracy: {(s.tp+s.tn)/(s.tp+s.fp+s.tn+s.fn):.2f} , False Positives: {s.fp}, False Negatives: {s.fn}')

Compilation is falling back to object mode WITH looplifting enabled because Function "strong_classifier" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at <ipython-input-27-886b5b384fd2> (3)

File "<ipython-input-27-886b5b384fd2>", line 3:
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
    sum_hypotheses = 0.
    ^

  @jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "strong_classifier" failed type inference due to: cannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'>

File "<ipython-input-27-886b5b384fd2>", line 5:
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
    <source elided>
    sum_alphas = 0.
    for c in weak_classifiers:
    ^

  @jit

File "<ipython-input-27-886b5b384fd2>", line 2:
@jit
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
^

  state.func_ir.loc))
Fall-ba

Accuracy: 0.79 , False Positives: 181, False Negatives: 332


In [33]:
weak_classifiers_5, w_history = build_weak_classifiers('5 rounds', 5, xis, ys, features)

Building weak classifier 1/5 ...


Compilation is falling back to object mode WITH looplifting enabled because Function "find_best_threshold" failed type inference due to: Invalid use of Function(<class 'float'>) with argument(s) of type(s): (Literal[str](inf))
 * parameterized
In definition 0:
    TypeError: float() only support for numbers
    raised from /home/angshul1994/.local/lib/python3.6/site-packages/numba/typing/builtins.py:889
In definition 1:
    TypeError: float() only support for numbers
    raised from /home/angshul1994/.local/lib/python3.6/site-packages/numba/typing/builtins.py:889
This error is usually caused by passing an argument of a type that is unsupported by the named function.
[1] During: resolving callee type: Function(<class 'float'>)
[2] During: typing of call at <ipython-input-29-51fd85f81679> (21)


File "<ipython-input-29-51fd85f81679>", line 21:
def find_best_threshold(zs: np.ndarray, t_minus: float, t_plus: float, s_minuses: List[float], s_pluses: List[float]) -> ThresholdPolarity:
    mi

t=1/5 1.69s (1.69s in this stage) 1/63960 0.00% evaluated. Classification error improved to 0.33493 using Feature2h(x=0, y=0, width=2, height=1) ...
t=1/5 2.23s (2.23s in this stage) 5/63960 0.01% evaluated. Classification error improved to 0.27158 using Feature2h(x=0, y=4, width=2, height=1) ...
t=1/5 5.51s (5.51s in this stage) 29/63960 0.04% evaluated. Classification error improved to 0.25668 using Feature2h(x=1, y=9, width=2, height=1) ...
t=1/5 14.83s (14.83s in this stage) 106/63960 0.16% evaluated. Classification error improved to 0.24801 using Feature2h(x=5, y=10, width=2, height=1) ...
t=1/5 21.89s (21.88s in this stage) 160/63960 0.25% evaluated. Classification error improved to 0.17865 using Feature2h(x=8, y=7, width=2, height=1) ...
t=1/5 58.30s (58.30s in this stage) 491/63960 0.77% evaluated. Classification error improved to 0.15210 using Feature2h(x=8, y=4, width=2, height=2) ...
t=1/5 100.07s (100.07s in this stage) 805/63960 1.26% evaluated. Classification error improv

Compilation is falling back to object mode WITH looplifting enabled because Function "run_weak_classifier" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at <ipython-input-26-e47a16af2a40> (8)

File "<ipython-input-26-e47a16af2a40>", line 8:
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -> float:
    return weak_classifier(x=x, f=c.classifier, polarity=c.polarity, theta=c.threshold)
    ^

  @jit

File "<ipython-input-26-e47a16af2a40>", line 7:
@jit
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -> float:
^

  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "<ipython-input-26-e47a16af2a40>", line 7:
@jit
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -

Building weak classifier 2/5 ...
t=2/5 8649.41s (0.52s in this stage) 1/63960 0.00% evaluated. Classification error improved to 0.31558 using Feature2h(x=0, y=0, width=2, height=1) ...
t=2/5 8649.93s (1.03s in this stage) 5/63960 0.01% evaluated. Classification error improved to 0.29233 using Feature2h(x=0, y=4, width=2, height=1) ...
t=2/5 8679.34s (30.45s in this stage) 263/63960 0.41% evaluated. Classification error improved to 0.28143 using Feature2h(x=13, y=15, width=2, height=1) ...
t=2/5 8687.93s (39.04s in this stage) 334/63960 0.52% evaluated. Classification error improved to 0.26122 using Feature2h(x=17, y=10, width=2, height=1) ...
t=2/5 8721.66s (72.76s in this stage) 569/63960 0.89% evaluated. Classification error improved to 0.24411 using Feature2h(x=12, y=10, width=2, height=2) ...
t=2/5 8985.54s (336.64s in this stage) 2569/63960 4.02% evaluated.
t=2/5 9128.36s (479.46s in this stage) 3659/63960 5.72% evaluated. Classification error improved to 0.22927 using Feature2h(x

In [34]:
weak_classifiers_5

[WeakClassifier(threshold=-2.414795756340027, polarity=1, alpha=1.9315405703473476, classifier=Feature2h(x=8, y=1, width=2, height=10)),
 WeakClassifier(threshold=1.1981849670410156, polarity=-1, alpha=1.4382275207572313, classifier=Feature2v(x=14, y=4, width=4, height=2)),
 WeakClassifier(threshold=0.23963642120361328, polarity=-1, alpha=1.2819242706852847, classifier=Feature2v(x=12, y=15, width=3, height=2)),
 WeakClassifier(threshold=-4.424053192138672, polarity=1, alpha=1.530361913268545, classifier=Feature4(x=4, y=0, width=4, height=12)),
 WeakClassifier(threshold=1.585296630859375, polarity=-1, alpha=1.1458350025396984, classifier=Feature4(x=4, y=10, width=8, height=6))]

In [42]:
ys_strong = np.array([strong_classifier(x, weak_classifiers) for x in test_xis])
c, s = predict(test_ys, ys_strong)
print(f'Accuracy: {(s.tp+s.tn)/(s.tp+s.fp+s.tn+s.fn):.2f} , False Positives: {s.fp}, False Negatives: {s.fn}')

Compilation is falling back to object mode WITH looplifting enabled because Function "strong_classifier" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at <ipython-input-27-886b5b384fd2> (3)

File "<ipython-input-27-886b5b384fd2>", line 3:
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
    sum_hypotheses = 0.
    ^

  @jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "strong_classifier" failed type inference due to: cannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'>

File "<ipython-input-27-886b5b384fd2>", line 5:
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
    <source elided>
    sum_alphas = 0.
    for c in weak_classifiers:
    ^

  @jit

File "<ipython-input-27-886b5b384fd2>", line 2:
@jit
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
^

  state.func_ir.loc))
Fall-ba

Accuracy: 0.84 , False Positives: 61, False Negatives: 337


In [42]:
weak_classifiers_3, w_history = build_weak_classifiers_3('3 rounds', 3, xis, ys, features)

Building weak classifier 1/3 ...


Compilation is falling back to object mode WITH looplifting enabled because Function "find_best_threshold" failed type inference due to: Invalid use of Function(<class 'float'>) with argument(s) of type(s): (Literal[str](inf))
 * parameterized
In definition 0:
    TypeError: float() only support for numbers
    raised from /home/angshul1994/.local/lib/python3.6/site-packages/numba/typing/builtins.py:889
In definition 1:
    TypeError: float() only support for numbers
    raised from /home/angshul1994/.local/lib/python3.6/site-packages/numba/typing/builtins.py:889
This error is usually caused by passing an argument of a type that is unsupported by the named function.
[1] During: resolving callee type: Function(<class 'float'>)
[2] During: typing of call at <ipython-input-38-51fd85f81679> (21)


File "<ipython-input-38-51fd85f81679>", line 21:
def find_best_threshold(zs: np.ndarray, t_minus: float, t_plus: float, s_minuses: List[float], s_pluses: List[float]) -> ThresholdPolarity:
    mi

t=1/3 1.83s (1.83s in this stage) 1/63960 0.00% evaluated. Classification error improved to 0.33393 using Feature2h(x=0, y=0, width=2, height=1) ...
t=1/3 2.41s (2.41s in this stage) 5/63960 0.01% evaluated. Classification error improved to 0.26404 using Feature2h(x=0, y=4, width=2, height=1) ...
t=1/3 5.90s (5.90s in this stage) 29/63960 0.04% evaluated. Classification error improved to 0.25669 using Feature2h(x=1, y=9, width=2, height=1) ...
t=1/3 15.74s (15.74s in this stage) 106/63960 0.16% evaluated. Classification error improved to 0.24652 using Feature2h(x=5, y=10, width=2, height=1) ...
t=1/3 23.26s (23.26s in this stage) 160/63960 0.25% evaluated. Classification error improved to 0.17865 using Feature2h(x=8, y=7, width=2, height=1) ...
t=1/3 62.32s (62.32s in this stage) 491/63960 0.77% evaluated. Classification error improved to 0.15235 using Feature2h(x=8, y=4, width=2, height=2) ...
t=1/3 107.63s (107.63s in this stage) 805/63960 1.26% evaluated. Classification error improv

Compilation is falling back to object mode WITH looplifting enabled because Function "run_weak_classifier" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at <ipython-input-35-e47a16af2a40> (8)

File "<ipython-input-35-e47a16af2a40>", line 8:
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -> float:
    return weak_classifier(x=x, f=c.classifier, polarity=c.polarity, theta=c.threshold)
    ^

  @jit

File "<ipython-input-35-e47a16af2a40>", line 7:
@jit
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -> float:
^

  state.func_ir.loc))
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "<ipython-input-35-e47a16af2a40>", line 7:
@jit
def run_weak_classifier(x: np.ndarray, c: WeakClassifier) -

Building weak classifier 2/3 ...
t=2/3 9410.01s (0.57s in this stage) 1/63960 0.00% evaluated. Classification error improved to 0.31500 using Feature2h(x=0, y=0, width=2, height=1) ...
t=2/3 9410.60s (1.16s in this stage) 5/63960 0.01% evaluated. Classification error improved to 0.29445 using Feature2h(x=0, y=4, width=2, height=1) ...
t=2/3 9443.07s (33.64s in this stage) 263/63960 0.41% evaluated. Classification error improved to 0.28275 using Feature2h(x=13, y=15, width=2, height=1) ...
t=2/3 9452.41s (42.97s in this stage) 334/63960 0.52% evaluated. Classification error improved to 0.26040 using Feature2h(x=17, y=10, width=2, height=1) ...
t=2/3 9489.78s (80.35s in this stage) 569/63960 0.89% evaluated. Classification error improved to 0.24339 using Feature2h(x=12, y=10, width=2, height=2) ...
t=2/3 9782.48s (373.04s in this stage) 2569/63960 4.02% evaluated.
t=2/3 9941.74s (532.31s in this stage) 3659/63960 5.72% evaluated. Classification error improved to 0.23423 using Feature2h(x

In [50]:
ys_strong = np.array([strong_classifier(x, weak_classifiers_3) for x in test_xis])
c, s = predict(test_ys, ys_strong)
print(f'Accuracy: {(s.tp+s.tn)/(s.tp+s.fp+s.tn+s.fn):.2f} , False Positives: {s.fp}, False Negatives: {s.fn}')

Compilation is falling back to object mode WITH looplifting enabled because Function "strong_classifier" failed type inference due to: non-precise type pyobject
[1] During: typing of argument at <ipython-input-36-886b5b384fd2> (3)

File "<ipython-input-36-886b5b384fd2>", line 3:
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
    sum_hypotheses = 0.
    ^

  @jit
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "strong_classifier" failed type inference due to: cannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'>

File "<ipython-input-36-886b5b384fd2>", line 5:
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
    <source elided>
    sum_alphas = 0.
    for c in weak_classifiers:
    ^

  @jit

File "<ipython-input-36-886b5b384fd2>", line 2:
@jit
def strong_classifier(x: np.ndarray, weak_classifiers: List[WeakClassifier]) -> int:
^

  state.func_ir.loc))
Fall-ba

Accuracy: 0.84 , False Positives: 57, False Negatives: 347
