In [None]:
# !pip install xmltodict

### Reading utils
Data source: http://www.robots.ox.ac.uk/ActiveVision/Research/Projects/2009bbenfold_headpose/Datasets/

The extracted images are in canvas. Also, the same zip can be found here: https://box.skoltech.ru/index.php/s/GpkIScLyG9zM5YF

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline

In [None]:
from skimage import io
from skimage import data
from skimage.color import rgb2gray
from skimage.draw import rectangle_perimeter

In [None]:
import glob
from os.path import join
import xmltodict
import numpy as np

XML_PATH = './pedestrian_hog/annotations/xmls/'
TRAIN_PATH = './pedestrian_hog/images/'
TEST_PATH = './pedestrian_hog/test_images/'
TEST_STARTS_WITH = 3600

In [None]:
def _bbox_to_np(bbox):
    rect = np.array([[bbox['xmin'], bbox['xmax']], 
                 [bbox['ymin'], bbox['ymax']]
                ])  
    return rect.astype(int)

def _read_annotation_xml(fname):
    with open (fname, "r") as file:
        xml = file.readlines()
    annotation_dict = xmltodict.parse(''.join(xml))
    return [_bbox_to_np(x['bndbox']) 
            for x in annotation_dict['annotation']['object']]

def _get_pair(num):
    image_path = TRAIN_PATH if num < TEST_STARTS_WITH else TEST_PATH
    image = io.imread(join(image_path, '{}.jpg'.format(num)))
    image = rgb2gray(image)
    annotations = _read_annotation_xml(join(XML_PATH, '{}.xml'.format(num)))
    return image, annotations    
    
def get_train_sample(truncate=3000, step=10):
    return [_get_pair(i) for i in range(0, truncate, step)]

def get_test_sample(truncate=1000, step=100):
    return [_get_pair(i + TEST_STARTS_WITH) 
            for i in range(0, truncate, step)]

In [None]:
train = get_train_sample()
test = get_test_sample()

In [None]:
len(train), len(test)

In [None]:
train[0][1][0]  # [[x1, x2], [y1, y2]]

#### Now we have a set of annotated images

In [None]:
def plot_frame(image, bboxes, c=None):
    plt.figure(figsize=[10, 7])
    plt.imshow(image, cmap=cm.gray)
    
    for bbox in bboxes:
        plt.plot(*rectangle_perimeter(bbox[:, 0], bbox[:, 1]), c=c)
        
    plt.xlim([0, image.shape[1]])
    plt.ylim([image.shape[0], 0])

In [None]:
image, bboxes = train[0]

In [None]:
def get_patch(image, bbox):
    xmin = max(0, bbox[1, 0])
    xmax = bbox[1, 1]
    ymin = max(0, bbox[0, 0])
    ymax = bbox[0, 1]
    return image[xmin:xmax, ymin:ymax]

In [None]:
plt.figure(figsize=[10, 5])
for i in range(4):
    plt.subplot(1, 4, i+1)
    io.imshow(get_patch(image, bboxes[i]))

In [None]:
plot_frame(image, bboxes)

### Part 1: prepare the dataset
#### a. write a function to extract positive samples
#### b. write a function to extract negative samples

In [None]:
from random import choice, randint

In [None]:
def positive_generator(data):
    
    for img, ann in data:  # looping over the dataset
        for bbox in ann:  # looping over bounding boxes within one image
            yield get_patch(img, bbox), bbox

In [None]:
pos_gen = positive_generator(train)

In [None]:
for img, bbox in pos_gen:
    
    io.imshow(img)
    print(bbox)
    
    break

In [None]:
# len(list(pos_gen))

In [None]:
def get_data_stats(data):
    
    num_samples = len(list(positive_generator(data)))
    ws, hs = [], []
    
    for _, bbox in positive_generator(data):
        ws.append(bbox[0][1] - bbox[0][0])
        hs.append(bbox[1][1] - bbox[1][0])
    
    ws, hs = np.array(ws), np.array(hs)
    return num_samples, (ws.mean(), ws.std()), (hs.mean(), hs.std())

In [None]:
get_data_stats(train)

In [None]:
from IPython.display import Image
Image(filename='iou.png') 

In [None]:
def get_iou(bbox1, bbox2):
    
    assert bbox1[0][0] < bbox1[0][1]
    assert bbox1[1][0] < bbox1[1][1]
    assert bbox2[0][0] < bbox2[0][1]
    assert bbox2[1][0] < bbox2[1][1]

    x_left = max(bbox1[0][0], bbox2[0][0])
    y_top = max(bbox1[1][0], bbox2[1][0])
    x_right = min(bbox1[0][1], bbox2[0][1])
    y_bottom = min(bbox1[1][1], bbox2[1][1])

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    bbox1_area = (bbox1[0][1] - bbox1[0][0]) * (bbox1[1][1] - bbox1[1][0])
    bbox2_area = (bbox2[0][1] - bbox2[0][0]) * (bbox2[1][1] - bbox2[1][0])

    iou = intersection_area / float(bbox1_area + bbox2_area - intersection_area)
    
    assert iou >= 0.0
    assert iou <= 1.0
    
    return iou

In [None]:
def negative_generator(data, neg_pos_ratio=1.0, min_iou=0, max_iou=0.2):
    MIN_WIDTH, MIN_HEIGHT = 15, 60
    pos_samples, w_stats, h_stats = get_data_stats(train)
    num_samples = int(pos_samples * neg_pos_ratio)
    
    for i in range(num_samples):
        img, ann = choice(data)
        bad_sample = True
        
        while bad_sample:
            
            # Generate a random bbox
            w = int(np.random.normal(*w_stats, 1)[0])
            h = int(np.random.normal(*h_stats, 1)[0])
            
            if w < MIN_WIDTH or h < MIN_HEIGHT:
                continue
            
            x0 = randint(0, img.shape[1] - w - 1)
            y0 = randint(0, img.shape[0] - h - 1)
            
            bbox_candidate = np.array([[x0, x0+w], [y0, y0+h]])
            
            # Compare with positive bboxes
            candidate_max_iou = 0
            for pos_bbox in ann:
                iou = get_iou(bbox_candidate, pos_bbox)
                candidate_max_iou = max(candidate_max_iou, iou)
                
            if candidate_max_iou >= min_iou and candidate_max_iou <= max_iou:
                bad_sample = False
            
        yield get_patch(img, bbox_candidate), bbox_candidate

In [None]:
neg_gen = negative_generator(train)

In [None]:
# neg_gen = negative_generator(train, min_iou=0.05, max_iou=0.3)

In [None]:
for img, bbox in neg_gen:
    
    io.imshow(img)
    print(bbox)
    
    break

### Part 2: Classification
#### a. Extract HOG features
#### b. Train linear SVM and estimate prediciton quality using cross-validation
#### c. Train linear SVM with hard-negative samples

In [None]:
from skimage.feature import hog
from skimage import data, exposure
from skimage.transform import resize

Resize images to a fixed size to have fixed-sized HOG features

In [None]:
img, bbox = next(iter(pos_gen))

image_resized = resize(img, (64, 32),
                       anti_aliasing=True)

fd, hog_image = hog(image_resized, orientations=4, pixels_per_cell=(4, 4),
                cells_per_block=(2, 2), visualize=True)
print(img.shape, fd.shape)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8), sharex=True, sharey=True)

ax1.axis('off')
ax1.imshow(image_resized, cmap=plt.cm.gray)
ax1.set_title('Input image')

# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

ax2.axis('off')
ax2.imshow(hog_image_rescaled, cmap=plt.cm.gray)
ax2.set_title('Histogram of Oriented Gradients')
plt.show()

In [None]:
def img2hog(img):
    image_resized = resize(img, (64, 32), anti_aliasing=True)
    fd = hog(image_resized, orientations=4, pixels_per_cell=(4, 4),
                    cells_per_block=(2, 2), visualize=False)
    return fd

In [None]:
def get_hog_features(data, neg_pos_ratio=1.0, hard_neg=False):
    
    X, y = [], []
    pos_gen = positive_generator(data)
    
    if hard_neg:
        neg_gen = negative_generator(train, min_iou=0.05, max_iou=0.3)
    else:
        neg_gen = negative_generator(data)
    
    
    for img, _ in pos_gen:
        X.append(img2hog(img))
        y.append(1)
    
    for img, _ in neg_gen:
        X.append(img2hog(img))
        y.append(0)
    
    idxs = np.arange(len(y))
    np.random.shuffle(idxs)
    
    return np.array(X)[idxs], np.array(y)[idxs]

Generate training and test data

In [None]:
%%time
X, y = get_hog_features(train)

In [None]:
X.shape, y.shape

In [None]:
%%time
X_test, y_test = get_hog_features(test)

In [None]:
X_test.shape, y_test.shape

Train an SVM classifier

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from joblib import dump, load
from sklearn.metrics import accuracy_score

In [None]:
%%time
scores = cross_val_score(SVC(), X, y, cv=3, n_jobs=3)

In [None]:
scores

In [None]:
clf = SVC(probability=True)
clf.fit(X, y)

Save the trained model

In [None]:
dump(clf, 'pedestrian_svm.joblib') 

In [None]:
clf = load('pedestrian_svm.joblib') 

In [None]:
n = 0
clf.predict_proba(X[n].reshape(1, -1)), y[n]

In [None]:
clf.predict(X[n].reshape(1, -1)), y[n]

Re-train a classifier with hard-negative samples

In [None]:
X_hard, y_hard = get_hog_features(train, hard_neg=True)

In [None]:
%%time
scores_hard = cross_val_score(SVC(), X_hard, y_hard, cv=3, n_jobs=3)

In [None]:
scores_hard

In [None]:
clf_hard = SVC(probability=True)
clf_hard.fit(X_hard, y_hard)

Check models performance on the test set

In [None]:
%%time
accuracy_score(clf.predict(X_test), y_test, normalize=True)

In [None]:
accuracy_score(clf_hard.predict(X_test), y_test, normalize=True)

## Part 3: Object detection

Generate moving windows acrosss an image

In [None]:
def moving_window_generator(img, stride=(1, 1), anchors=None):
    
    if anchors is None:
        anchors = [
            (100, 40)        
        ]
    
    for (bbox_h, bbox_w) in anchors:
        for y0 in range(0, img.shape[0] - bbox_h - 1, stride[0]):
            for x0 in range(0, img.shape[1] - bbox_w - 1, stride[1]):
                yield np.array([
                    [x0, x0 + bbox_w],
                    [y0, y0 + bbox_h]
                ])

In [None]:
plot_frame(image, [bbox for bbox in moving_window_generator(image, stride=(10, 10))])

In [None]:
plot_frame(image, [bbox for bbox in moving_window_generator(image, stride=(50, 20))])

In [None]:
plot_frame(image, [bbox for bbox in moving_window_generator(image, stride=(150, 150),
    anchors=[
        [80, 30],
        [100, 40],
        [120, 50],
        
        [80, 50],
        [80, 60]
    ])])

Make prediction for each window

In [None]:
def detect(img, clf, thres=.5):
    
    candidate_bboxes = []
    confs = []
    
    for bbox in moving_window_generator(image, stride=(10, 10),
        anchors=[
            [80, 30],
            [100, 40],
            [120, 50],

            [80, 50],
            [80, 60]
        ]):
        patch = get_patch(img, bbox)
        hog = img2hog(patch)
        proba = clf.predict_proba(hog.reshape(1, -1))
        
        if proba[0][1] > thres:
            candidate_bboxes.append(bbox)
            confs.append(proba[0][1])
        
    return np.array(candidate_bboxes), np.array(confs)

In [None]:
%%time
n = 0
plot_frame(test[n][0], detect(test[n][0], clf_hard, thres=.5)[0])

In [None]:
%%time
n = 0
tmp_pred, tmp_confs = detect(test[n][0], clf_hard, thres=.95)
plot_frame(test[n][0], tmp_pred)

#### In addition to some number of false positives, we also have highly overlapping bboxes. To remove them, we need to apply non-maximum suppression

In [None]:
def nms(bboxes, confs, iou_thres=0.3):
    
    final_bboxes = []
    idxs = np.argsort(confs)[::-1]
    confs = confs[idxs]
    bboxes = list(bboxes[idxs])
    
    while len(bboxes) > 0:
        
        major_bbox = bboxes[0]
        bboxes = bboxes[1:]
        final_bboxes.append(major_bbox)
        overlapped_idxs = []
        
        for i, bbox in enumerate(bboxes):
            if get_iou(major_bbox, bbox) > iou_thres:
                overlapped_idxs.append(i)
        
        overlapped_idxs = sorted(overlapped_idxs, reverse=True)
        for idx in overlapped_idxs:
            bboxes.pop(idx)
        
    return np.array(final_bboxes)

In [None]:
%%time
bboxes = nms(tmp_pred, tmp_confs)

In [None]:
plot_frame(test[n][0], bboxes)

Visulize all test set

In [None]:
%%time
fig, axs = plt.subplots(len(test), 2, figsize=(15, 5 * len(test)), sharex=True, sharey=True)

for i, (img, gt_bbox) in enumerate(test):
    gt_bbox = np.array(gt_bbox)
    pred_bbox, pred_confs = detect(img, clf_hard, thres=.95)
    pred_bbox = nms(pred_bbox, pred_confs)
    
    axs[i, 0].imshow(img, cmap=cm.gray)
    for bbox in gt_bbox:
        axs[i, 0].plot(*rectangle_perimeter(bbox[:, 0], bbox[:, 1]))
    
    axs[i, 1].imshow(img, cmap=cm.gray)
    for bbox in pred_bbox:
        axs[i, 1].plot(*rectangle_perimeter(bbox[:, 0], bbox[:, 1]))

plt.show();

Further improvements:
* Find better anchors via clustering
* Tune SVM (and try other classifiers)
* Image augmentations
* Tune HOG parameters