In [None]:
import cv2
import numpy as np
import os
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
from scipy.ndimage import imread
from utils import *

In [None]:
#This SVM detector was adapted from my submission from problem set 4

In [None]:
'''
RUN_DETECTOR Given an image, runs the SVM detector and outputs bounding
boxes and scores

Arguments:
    im - the image matrix

    clf - the sklearn SVM object. You will probably use the 
        decision_function() method to determine whether the object is 
        a face or not.
        http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

    window_size - an array which contains the height and width of the sliding
    	window

    cell_size - each cell will be of size (cell_size, cell_size) pixels

    block_size - each block will be of size (block_size, block_size) cells

    nbins - number of histogram bins

Returns:
    bboxes - D x 4 bounding boxes that tell [xmin ymin width height] per bounding
    	boxQD

    scores - the SVM scores associated with each bounding box in bboxes

You can compute the HoG features using the compute_hog_features() method
that you implemented in PS3. We have provided an implementation in utils.py,
but feel free to use your own implementation. You will use the HoG features
in a sliding window based detection approach.

Recall that using a sliding window is to take a certain section (called the 
window) of the image and compute a score for it. This window then "slides"
across the image, shifting by either n pixels up or down (where n is called 
the window's stride). 

Using a sliding window approach (with stride of block_size * cell_size / 2),
compute the SVM score for that window. If it's greater than 1 (the SVM decision
boundary), add it to the bounding box list. At the very end, after implementing 
nonmaximal suppression, you will filter the nonmaximal bounding boxes out.
'''
def run_detector(im, clf, window_size, cell_size, block_size, nbins, thresh=1):
    W = window_size[0]
    H = window_size[1]
    stride = int(block_size * cell_size / 2)
    
    bboxes = []
    scores = []
    
    for i in range(0,im.shape[0] - H,stride):
        for j in range(0,im.shape[0] - W,stride):
            features = compute_hog_features( im[i:i+H, j:j+W], cell_size, block_size, nbins)
            score = clf.decision_function(features.reshape(1,-1))
            if(score > -1.0):
                bboxes.append([j,i,W,H])
                scores.append(score)
    
    
    bboxes = np.array(bboxes)
    bboxes = np.reshape(bboxes,(bboxes.size/4,4))
    scores = np.array(scores)
    return bboxes, scores

In [None]:
'''
NON_MAX_SUPPRESSION Given a list of bounding boxes, returns a subset that
uses high confidence detections to suppresses other overlapping
detections. Detections can partially overlap, but the
center of one detection can not be within another detection.

Arguments:
    bboxes - ndarray of size (N,4) where N is the number of detections,
        and each row is [x_min, y_min, width, height]
    
    confidences - ndarray of size (N, 1) of the SVM confidence of each bounding
    	box.

    img_size - [height,width] dimensions of the image.

Returns:
    nms_bboxes -  ndarray of size (N, 4) where N is the number of non-overlapping
        detections, and each row is [x_min, y_min, width, height]. Each bounding box
        should not be overlapping significantly with any other bounding box.

In order to get the list of maximal bounding boxes, first sort bboxes by 
confidences. Then go through each of the bboxes in order, adding them to
the list if they do not significantly overlap with any already in the list. 
A significant overlap is if the center of one bbox is in the other bbox.
'''
def non_max_suppression(bboxes, confidences):
    indices = np.argsort(confidences,axis=0)[::-1] #indices that sort confidences in reverse order
    confidences = confidences[indices]
    bboxes = bboxes[indices].reshape((bboxes.shape[0],4))
    nms_bboxes = bboxes[0,:].reshape((1,4))
    
    for i in range(1,confidences.size):
        center = [bboxes[i,0]+bboxes[i,2]/2 , bboxes[i,1]+bboxes[i,3]/2]
        for j in range(0,nms_bboxes.shape[0]):
            #if not within width or height of bounding box, add to array of bounding boxes
            if not ((center[0] > nms_bboxes[j,0] and center[0] < nms_bboxes[j,0] + nms_bboxes[j,2]) and
                (center[1] > nms_bboxes[j,1] and center[1] < nms_bboxes[j,1] + nms_bboxes[j,3])):
                nms_bboxes = np.append(nms_bboxes,bboxes[j,:].reshape(1,4),axis=1)
            
    return nms_bboxes

In [6]:
block_size = 6
cell_size = 3
nbins = 9
window_size = np.array([32, 32])

# compute or load features for training
if not (os.path.exists('features_pos.npy') and os.path.exists('features_neg.npy')):
    features_pos = get_positive_features('cropped_signs/stop_signs', cell_size, window_size, block_size, nbins)
    num_negative_examples = 10000
    features_neg = get_random_negative_features('LISA/negatives/negativePics', cell_size, window_size, block_size, nbins, num_negative_examples)
    np.save('features_pos.npy', features_pos)
    np.save('features_neg.npy', features_neg)
else:
    features_pos = np.load('features_pos.npy')
    features_neg = np.load('features_neg.npy')

X = np.vstack((features_pos, features_neg))
Y = np.hstack((np.ones(len(features_pos)), np.zeros(len(features_neg))))

LISA/negatives/negativePics\nosign00000.png


TypeError: Mismatch between array dtype ('<U43') and format specifier ('%.5f')

In [None]:
# Train the SVM
clf = LinearSVC(C=1, tol=1e-6, max_iter=10000, fit_intercept=True, loss='squared_hinge')
clf.fit(X, Y)
score = clf.score(X, Y)

In [None]:
# Part A: Sliding window detector
im = cv2.imread(r'LISA\vid1\frameAnnotations-vid_cmp1.avi_annotations\stop_1323812801.avi_image4.png')
im_yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV) #converting to YUV
im_yuv[:,:,0] = cv2.equalizeHist(im_yuv[:,:,0]) #equalize the histogram of the Y channel
im = cv2.cvtColor(im_yuv, cv2.COLOR_YUV2BGR) #converting back to BGR
im = cv2.cvtColor(im_yuv, cv2.COLOR_BGR2GRAY) #flattening
bboxes, scores = run_detector(im, clf, window_size, cell_size, block_size, nbins)
plot_img_with_bbox(im, bboxes, 'Without nonmaximal suppresion')
plt.show()

# Part B: Nonmaximal suppression
bboxes = non_max_suppression(bboxes, scores)
plot_img_with_bbox(im, bboxes, 'With nonmaximal suppresion')
plt.show()

In [None]:
bboxes, scores = run_detector(im, clf, window_size, cell_size, block_size, nbins)
plot_img_with_bbox(im, bboxes, 'Detected Signs')
plt.show()