# Training an object detection engine with HOG

Dataset: https://www.robots.ox.ac.uk/~vgg/data/pets/


In [None]:
import xml.etree.ElementTree as ET
from pathlib import Path

import cv2
from common import CV_DATASETS_DIR

IMAGES_ROOT = CV_DATASETS_DIR / "pets" / "images"
ANNOTS_ROOT = CV_DATASETS_DIR / "pets" / "annotations"/ "xmls"

def read_voc_xml(xmlfile: Path) -> dict:
    root = ET.parse(xmlfile).getroot()
    boxes = {
        "filename": root.find("filename").text,
        "objects": []
    }
    for box in root.iter("object"):
        bb = box.find("bndbox")
        boxes["objects"].append({
            "name": box.find("name").text,
            "xmin": int(bb.find("xmin").text),
            "ymin": int(bb.find("ymin").text),
            "xmax": int(bb.find("xmax").text),
            "ymax": int(bb.find("ymax").text),
        })
    return boxes

def make_square(xmin, xmax, ymin, ymax):
    """Shrink the bounding box to square shape"""
    xcenter = (xmax + xmin) // 2
    ycenter = (ymax + ymin) // 2
    halfdim = min(xmax-xmin, ymax-ymin) // 2
    xmin, xmax = xcenter-halfdim, xcenter+halfdim
    ymin, ymax = ycenter-halfdim, ycenter+halfdim
    return xmin, xmax, ymin, ymax


## Prepare positive and negative images

In [None]:
import random
from common import show_image_plot

# Define HOG parameters
winSize = (64, 64)
blockSize = (32, 32)
blockStride = (16, 16)
cellSize = (16, 16)
nbins = 9

positive = []
negative = []
num_positive = 5
num_negative = 2000

for xmlfile in ANNOTS_ROOT.glob("*.xml"):
    # Read annotation
    annot = read_voc_xml(xmlfile)
    # Allow only pictures with cat
    if annot["objects"][0]["name"] != "cat":
        continue
    # Adjust bounding box to square
    box = annot["objects"][0]
    xmin, xmax, ymin, ymax = make_square(box["xmin"], box["xmax"], box["ymin"], box["ymax"])
    # Read image
    image = cv2.imread(str(IMAGES_ROOT / annot["filename"]))
    assert image is not None
    # Crop + resize image
    sample = image[ymin:ymax, xmin:xmax]
    sample = cv2.resize(sample, winSize)
    positive.append(sample)
    if len(positive) >= num_positive:
        break

for xmlfile in ANNOTS_ROOT.glob("*.xml"):
    # Read annotation
    annot = read_voc_xml(xmlfile)
    # Allow only pictures with cat
    if annot["objects"][0]["name"] == "cat":
        continue
    # Read image
    image = cv2.imread(str(IMAGES_ROOT / annot["filename"]))
    assert image is not None
    # Generate random bounding box
    h, w = image.shape[:2]
    box_size = random.randint(winSize[0], min(h, w))
    x = random.randint(0, w-box_size)
    y = random.randint(0, h-box_size)
    # Crop + resize image
    sample = image[y:y+box_size, x:x+box_size]
    sample = cv2.resize(sample, winSize)
    negative.append(sample)
    if len(negative) >= num_negative:
        break
        
print("Positive: ", len(positive))        
print("Negative: ", len(negative))        

## Training classifier using HOG features

In [None]:
import numpy as np

hogd1 = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins)

# Extract features
images = positive + negative
labels = ([1] * len(positive)) + ([0] * len(negative))
data = []
for image in images:
    features = hogd1.compute(image)
    data.append(features.flatten())

data = np.array(data, dtype=np.float32)
labels = np.array(labels, dtype=np.int32)

# Train SVM classifier
svm = cv2.ml.SVM.create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_RBF)
svm.setTermCriteria((cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS, 100000, 1e-8))
svm.train(data, cv2.ml.ROW_SAMPLE, labels)


## Testing classifier 

In [None]:
from common import CV_MODELS_DIR, show_image_plot

hogd2 = cv2.HOGDescriptor()
hogd2.load(filename=str(CV_MODELS_DIR / "hog_cats_classifier.yaml"))

cnt = 0
num_testing = 10

for xmlfile in ANNOTS_ROOT.glob("*.xml"):
    annot = read_voc_xml(xmlfile)
    image = cv2.imread(str(IMAGES_ROOT / annot["filename"]))
    box = annot["objects"][0]
    start_point = (box["xmin"], box["ymin"])
    end_point = (box["xmax"], box["ymax"])
    locations, scores = hogd2.detectMultiScale(image)
    x, y, w, h = locations[np.argmax(scores.flatten())]
    cv2.rectangle(image, start_point, end_point, (0, 0, 255), 2)
    cv2.rectangle(image, (x, y), (x + w, y + h), (255,0,0), 2)    
    show_image_plot(image)
    cnt = cnt + 1
    if cnt >= num_testing:
        break

