In [62]:
%matplotlib inline  

from matplotlib import pyplot as plt
import logging
import numpy as np
import cv2
import time
import os
from fnmatch import fnmatch
import csv

Prepare file list

In [81]:
import imutils
from skimage.feature import hog
from skimage import io, color, exposure

def file_iterator(input_dir):
    for root, dirs, files in os.walk(input_dir):
        for f in files:
            if fnmatch(f, '*.jpg'):
                yield os.path.join(root, f)

filelist = {}
for f in file_iterator("./train/positive"):
    filelist[f] = 1

for f in file_iterator("./train/negative"):
    filelist[f] = 0

In [82]:
def hog_desc_skimage(img_file):
    img = color.rgb2gray(io.imread(img_file))
    fd, hog_img = hog(img, orientations=8, pixels_per_cell=(12, 12),
                    cells_per_block=(1, 1), visualise=True)
    return fd

def hog(winSize):
    blockSize = (8,8)
    blockStride = (4,4)
    cellSize = (4,4)
    nbins = 9
    derivAperture = 1
    winSigma = 4.
    histogramNormType = 0
    L2HysThreshold = 2.0000000000000001e-01
    gammaCorrection = 0
    nlevels = 64
    hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,
                        histogramNormType,L2HysThreshold,gammaCorrection,nlevels)
    return hog

In [83]:
files = filelist.keys()
label = [filelist[f] for f in files]
label = np.array(label).reshape(-1)

h = hog((128, 128))
data = [h.compute(cv2.cvtColor(cv2.imread(f), cv2.COLOR_BGR2GRAY)).reshape(-1) for f in files]

Training data 

In [84]:
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.externals import joblib
from sklearn import cross_validation

In [85]:
def estimated_classifier(data, label):
    tuned_params = [
        {'gamma': [1e-2, 1e-3, 1e-4, 1e-5], 'C': [0.01, 0.1, 1, 10, 100, 1000, 10000]}
    ]
    score = "f1"
    print "Tunning Parameters for %s" % score
    grid_search = GridSearchCV(SVC(kernel='rbf', C=1, gamma=1e-4, class_weight='balanced'), tuned_params, cv=5, scoring='%s' % score)
    grid_search.fit(data, label)
    print "Best parameters set found on development set:"
    print grid_search.best_params_
    print grid_search.best_score_
    return grid_search.best_estimator_

if os.path.isfile('./models/svmModel.pkl'):
    clf = joblib.load('./models/svmModel.pkl')
else:
    clf = estimated_classifier(data, label)
    #clf = SVC(kernel='rbf', C=1, gamma=1e-4, class_weight='balanced')
    #scores = cross_validation.cross_val_score(clf, iris.data, iris.target, cv=5)
    clf.fit(data, label)
    joblib.dump(clf, './models/svmModel.pkl')

Tunning Parameters for f1
Best parameters set found on development set:
{'C': 10, 'gamma': 0.001}
0.910238053045


Generate pyramids for original images

In [73]:
def sliding_window(image, stepSize, windowSize):
    for y in xrange(0, image.shape[0], stepSize):
        for x in xrange(0, image.shape[1], stepSize):
            right = image.shape[1] -1 if x + windowSize[0] >= image.shape[1] else x + windowSize[0]
            bottom = image.shape[0] - 1 if y + windowSize[1] >= image.shape[0] else y + windowSize[1]
            #print (bottom, right)
            yield (x, y, image[y:bottom, x:right])

def pyramid(image, scale=1.2, minSize=(128, 128)):
    width = image.shape[1]
    yield image.copy(), 1
    while True:
        w = int(image.shape[1] / scale)
        image = imutils.resize(image, width=min(512, w))
        if image.shape[0] < minSize[1] or image.shape[1] < minSize[0]:
            break
        yield image.copy(), width / float(image.shape[1])

In [74]:
def predict(test_img, hog, clf):
    for img, scale in pyramid(test_img):
        locs = []
        X = []
        for (loc_x, loc_y, window) in sliding_window(img, 8, (128, 128)):
            if window.shape[0] != 128 or window.shape[1] != 128:
                continue
            X.append(h.compute(window).reshape(-1))
            locs.append((loc_x, loc_y))

        if len(X) > 0:
            Y = clf.predict(X)
            points = [locs[i] for i, y in enumerate(Y) if y == 1]
            yield img, scale, points

In [75]:
def is_not_correct(bouding, point, scale, threshold=(0.6, 0.33)):
    point = [p * scale for p in point]
    x1 = max(bouding[0], point[0])
    y1 = max(bouding[1], point[1])
    x2 = min(bouding[2], int(point[0] + 128 * scale))
    y2 = min(bouding[3], int(point[1] + 128 * scale))
    s1 = 128 * 128 * scale * scale
    s2 = (bouding[2] - bouding[0]) * (bouding[3] - bouding[1])
    s3 = (x2 - x1) * (y2 - y1)
    r1 = s3/float(s1)
    r2 = s3/float(s2)
    if r1 < threshold[0] and r2 < threshold[1]:
        return True
    return False


In [76]:
def load_bounding_box(filename):
    bounding_box = {}
    csv_data = csv.reader(open(filename))
    row_num = 0
    for row in csv_data:
        if row_num == 0:
            tags = row
        else:
            bounding_box[row[0]] = [int(r) for r in row[1:]]
        row_num = row_num + 1
    return bounding_box

Apply hard-negative learning: generate false positve images and add it to training set

In [97]:
bounding_box = load_bounding_box('./bounding_box.csv')

def gather_false_negative(h, clf):
    for f in file_iterator('./test/'):
        test_img = cv2.cvtColor(cv2.imread(f), cv2.COLOR_BGR2GRAY)
        bounding = bounding_box[os.path.basename(f)]
        for img, scale, points in predict(test_img, h, clf):
            print '%s, %.2f' % (f, scale)
            points = [p for p in points if is_not_correct(bounding, p, scale)]
            for i, p in enumerate(points):
                window = img[p[1]:p[1] + 128, p[0]:p[0]+128]
                cv2.imwrite('./predict/%.3f_%d_negative_%s' % (scale, i, os.path.basename(f)), window)    

Predicting and Draw final bounding box and sliding window in orignal image

In [98]:
def add_positive_window(img, points):
    for upleft in points:
        bottomright = (upleft[0]+128, upleft[1]+128)
        color = (0, 55, 255)
        cv2.rectangle(img, upleft, bottomright, color, +2, 4)
        #if upleft[0] < (img.shape[1] / 3):
        #temp_img = img[upleft[1]:bottomright[1], upleft[0]:bottomright[0]]
        #cv2.imwrite('./train/img_%d.jpg' % int(round(time.time() * 1000)), temp_img)
    return img

def add_bounding_box(img, upleft, bottomright):
    color = (0, 55, 255)
    cv2.rectangle(img, upleft, bottomright, color, +4, 8)
    return img

def get_corner(points):
    xs = sorted([p[0] for p in points])
    ys = sorted([p[1] for p in points])
    x1 = xs[0]
    y1 = ys[0]
    x2 = xs[-1] + 128
    y2 = ys[-1] + 128
    return (x1, y1), (x2, y2)

In [90]:
for f in file_iterator('./test/'):
    test_img = cv2.cvtColor(cv2.imread(f), cv2.COLOR_BGR2GRAY)
    for img, scale, points in predict(test_img, h, clf):
        print f, scale
        if len(points) > 0:
            upleft, bottomright = get_corner(points)
            upleft = tuple([int(t*scale) for t in upleft])
            bottomright = tuple([int(t*scale) for t in bottomright])
            test_img = add_bounding_box(test_img, upleft, bottomright)
            img = add_positive_window(img, points)
            cv2.imwrite('./predict/%d_%d_%s' % (img.shape[1], img.shape[0], os.path.basename(f)), img)
    cv2.imwrite('./predict/' + os.path.basename(f), test_img)

In [99]:
def is_correct(bouding, box):
    x1 = max(bouding[0], box[0][0])
    y1 = max(bouding[1], box[0][1])
    x2 = min(bouding[2], box[1][0])
    y2 = min(bouding[3], box[1][1])
    s1 = (box[1][0] - box[0][0]) * (box[1][1] - box[0][1])
    s2 = (bouding[2] - bouding[0]) * (bouding[3] - bouding[1])
    s = (x2 - x1) * (y2 - y1)
    r1 = s/float(s1)
    r2 = s/float(s2)
    if r1 > 0.5 and r2 > 0.9:
        return True
    return False

def getting_final_bounding_box(boxes):
    ul = [b[0] for b in boxes]
    rb = [b[1] for b in boxes]
    xu = sorted([p[0] for p in ul])[0]
    yu = sorted([p[1] for p in ul])[0]
    xb = sorted([p[0] for p in rb])[-1]
    yb = sorted([p[1] for p in rb])[-1]
    #x1 = (xu[0] + xu[-1]) / 2
    #y1 = (yu[0] + yu[-1]) / 2
    #x2 = (xb[0] + xb[-1]) / 2
    #y2 = (yb[0] + yb[-1]) / 2
    return (xu, yu), (xb, yb)

count = 0
total = 0
for f in file_iterator('./test/'):
    test_img = cv2.cvtColor(cv2.imread(f), cv2.COLOR_BGR2GRAY)
    bounding = bounding_box[os.path.basename(f)]
    bboxes = []
    for img, scale, points in predict(test_img, h, clf):
        print f, scale
        if len(points) > 0:
            upleft, bottomright = get_corner(points)
            upleft = tuple([int(t*scale) for t in upleft])
            bottomright = tuple([int(t*scale) for t in bottomright])
            bboxes.append((upleft, bottomright))
            print upleft, bottomright
    upleft, bottomright = getting_final_bounding_box(bboxes)
    if not is_correct(bounding, (upleft, bottomright)):
        count = count + 1
        print f
    total = total + 1
    test_img = add_bounding_box(test_img, upleft, bottomright)
    cv2.imwrite('./predict/' + os.path.basename(f), test_img)  

print 'Count = %d' % count
print 'Total = %d' % total
print 'Accuracy: %.2f' % count/float(total)
    

./test/color_1462637394542.jpg 1
(288, 128) (576, 352)
./test/color_1462637394542.jpg 1.25
(260, 110) (600, 350)
./test/color_1462637394542.jpg 1.50234741784
(228, 72) (624, 348)
./test/color_1462637394542.jpg 1.80281690141
(201, 57) (634, 346)
./test/color_1462637394542.jpg 2.16949152542
(277, 34) (624, 347)
./test/color_1462637394542.jpg 2.61224489796
(271, 0) (626, 355)
[((288, 128), (576, 352)), ((260, 110), (600, 350)), ((228, 72), (624, 348)), ((201, 57), (634, 346)), ((277, 34), (624, 347)), ((271, 0), (626, 355))]
[(288, 128), (260, 110), (228, 72), (201, 57), (277, 34), (271, 0)]
[(576, 352), (600, 350), (624, 348), (634, 346), (624, 347), (626, 355)]
./test/color_1462637396718.jpg 1
./test/color_1462637396718.jpg 1.25
./test/color_1462637396718.jpg 1.50234741784
(240, 144) (468, 348)
./test/color_1462637396718.jpg 1.80281690141
(216, 115) (461, 346)
./test/color_1462637396718.jpg 2.16949152542
./test/color_1462637396718.jpg 2.61224489796
[((240, 144), (468, 348)), ((216, 115)

IndexError: list index out of range