In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
from skimage.feature import hog
from skimage import data, exposure
import json
import imutils
import argparse

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn import tree
from sklearn import linear_model
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
cv2HOGDescriptor = cv2.HOGDescriptor((64,64), (16,16), (8,8), (8,8), 9, 1, -1, 0, 0.2, 1, 64, 1)

In [2]:
def GetSubImage(img, bbox):
    top = round(bbox["top"])
    left = round(bbox["left"])
    right = round(bbox["right"])
    bottom = round(bbox["bottom"])
    
    width = right - left
    height = bottom - top
    diff = width - height
    add = round(diff/2)

    subImg = img[top - add:bottom + add, left:right]
    return subImg

In [3]:
class Point:
    def __init__(self, x, y):
        self.x = x
        self.y = y
 
# Returns true if two rectangles(l1, r1)
# and (l2, r2) overlap
def doOverlap(l1, r1, l2, r2):
     
    # To check if either rectangle is actually a line
      # For example  :  l1 ={-1,0}  r1={1,1}  l2={0,-1}  r2={0,1}
       
    if (l1.x == r1.x or l1.y == r2.y or l2.x == r2.x or l2.y == r2.y):
        # the line cannot have positive overlap
        return False
     
    # If one rectangle is on left side of other
    if(l1.x >= r2.x or l2.x >= r1.x):
        return False
 
    # If one rectangle is above other
    if(l1.y <= r2.y or l2.y <= r1.y):
        return False
 
    return True
    
#https://www.geeksforgeeks.org/find-two-rectangles-overlap/

In [4]:
def get_random_crop(image, crop_height, crop_width, bboxes):
    max_x = image.shape[1] - crop_width
    max_y = image.shape[0] - crop_height
    x = np.random.randint(0, max_x)
    y = np.random.randint(0, max_y)

    for bbox in bboxes:
        l1 = Point(x,y)
        r1 = Point(max_x, max_y)
        l2 = Point(bbox['left'], bbox['top'])
        r2 = Point(bbox['right'], bbox['bottom'])

        if (doOverlap(l1,r1,l2,r2)):
            return False, []

    crop = image[y: y + crop_height, x: x + crop_width]
    return True, crop

#https://stackoverflow.com/questions/42263020/opencv-trying-to-get-random-portion-of-image

In [5]:
def GetFeatureVector(n, test=False,debug=False):
    number = str(n)
    dataType = ""
    if test: 
        dataType = "test"
    else: 
        dataType = "train"

    if debug:
        print(dataType)

    training_path = "benchmark_velocity_" + dataType + "/clips/" + number + "/imgs/040.jpg"
    annotation = "benchmark_velocity_" + dataType + "/clips/" + number + "/annotation.json"
    cars = []
    nonCars = []
    with open(annotation) as jsonFile:
        x = json.load(jsonFile)
        img = cv2.imread(training_path)
        bboxes = [i["bbox"] for i in x]

        for bbox in bboxes:
            s = GetSubImage(img, bbox)
            resized_img = cv2.resize(s, (64, 64))
            fd = cv2HOGDescriptor.compute(resized_img)[:,0]
            cars.append(fd.tolist())
            
            #Note: feature extraction using skimage hog performs much slower than cv2
            #fd, hog_image = hog(resized_img, orientations=9, pixels_per_cell=(8, 8),
            #    	cells_per_block=(2, 2), visualize=True, multichannel=True)  

            if (debug):
                plt.imshow(resized_img)
                plt.show()
    
            if not test:
                randomCount = 0
                v, c = get_random_crop(img, s.shape[0], s.shape[1], bboxes)
                randImages = []
                while (randomCount < 5):
                    if v:
                        randomCount += 1
                        randImages.append(c)
                    v,c = get_random_crop(img, s.shape[0], s.shape[1], bboxes)
        
                for croppedImage in randImages:
                    resized_cimg = cv2.resize(croppedImage, (64, 64))

                    #Note: feature extraction using skimage hog performs much slower than cv2
                    #fd, hog_image = hog(resized_cimg, orientations=9, pixels_per_cell=(8, 8),
                	#cells_per_block=(2, 2), visualize=True, multichannel=True)

                    fd = cv2HOGDescriptor.compute(resized_cimg)[:,0]
                    nonCars.append(fd.tolist())
        
        if test:
            return cars
        
        return [cars, nonCars] 

In [6]:
def GetSuppFeatureVectors():   
    path = "benchmark_velocity_supp"
    cars = []
    nonCars = []
    annotation = path + "/annotation.json"

    with open(annotation) as jsonFile:
        x = json.load(jsonFile)
        for dictionary in x[:1000]:
            name = dictionary["file_name"]
            bboxes = dictionary["bbox"]
            img = cv2.imread(path + "/" + name)

            for bbox in bboxes:
                s = GetSubImage(img, bbox)
                try:
                    resized_img = cv2.resize(s, (64, 64))
                except:
                    #labelled car is too small
                    #print("error car is too small to detect")
                    #print(bbox)
                    continue
                
                fd = cv2HOGDescriptor.compute(resized_img)[:,0]      
                cars.append(fd)
                randomCount = 0
                v, c = get_random_crop(img, s.shape[0], s.shape[1], bboxes)
                randImages = []
                tries = 0

                while (randomCount < 20 and tries < 10):
                    if v:
                        randomCount += 1
                        randImages.append(c)
                    else:
                        tries += 1

                    v,c = get_random_crop(img, s.shape[0], s.shape[1], bboxes)
                
                for croppedImage in randImages:
                    resized_cimg = cv2.resize(croppedImage, (64, 64))
                    fd = cv2HOGDescriptor.compute(resized_cimg)[:,0]   
                    nonCars.append(fd)

    return [cars, nonCars]

In [7]:
training_images = []
training_labels = []

print("getting Feature Vectors from supplmentary dataset")
retVal = GetSuppFeatureVectors()
cars = retVal[0]
nonCars = retVal[1]

for c in cars:
    training_images.append(c)
    training_labels.append(0)

for nc in nonCars:
    training_images.append(nc)
    training_labels.append(1)

print("getting Feature Vectors from training dataset")
for i in range(1, 1075):
    retVal = GetFeatureVector(i, False, False)
    cars = retVal[0]
    nonCars = retVal[1]

    for c in cars:
        training_images.append(c)
        training_labels.append(0)
    
    for nc in nonCars:
        training_images.append(nc)
        training_labels.append(1)

print('scaling training data')
scaler.fit(training_images)
scaler.transform(training_images)

getting Feature Vectors from supplmentary dataset
getting Feature Vectors from training dataset
scaling training data


array([[-0.69892984, -0.25487555,  0.38395966, ..., -0.94795683,
        -0.17379206, -0.58598213],
       [-0.76992863,  0.61840467,  1.42714125, ..., -1.11763876,
        -1.04256756, -0.76317799],
       [-0.33367209, -0.07800933, -0.77800717, ..., -1.05178834,
        -0.99495611, -0.54369203],
       ...,
       [-0.77482577,  0.05518334,  1.34771871, ..., -0.22430778,
        -0.86920411, -0.70652897],
       [-0.15400843, -0.97635951, -1.03431817, ...,  0.90295506,
        -0.81712132, -0.85366977],
       [-0.74911532,  0.67490717,  2.82824088, ...,  1.12899754,
         0.54258317, -0.40969199]])

In [8]:
test_images = []
test_labels = []

print("getting Feature Vectors from test dataset")
for i in range(1, 270):
    cars = GetFeatureVector(i, True)
    for c in cars:
        test_images.append(c)
        test_labels.append(0)

print('scaling test data')
scaler.transform(test_images)


getting Feature Vectors from test dataset
scaling test data


array([[-0.1138388 , -0.17764873, -0.59076364, ..., -0.45454964,
        -1.04181115, -0.85053914],
       [ 0.39919657, -0.49866178, -0.76469145, ..., -1.09909699,
        -1.02598774, -0.63961623],
       [-0.36977486,  1.28306984,  1.66021691, ..., -1.1113548 ,
        -0.98894316, -0.77668866],
       ...,
       [-0.39631239,  1.45426161,  1.21426095, ..., -1.11788195,
        -1.04274516, -0.83710891],
       [-0.5064983 ,  1.82406646,  0.54504553, ..., -1.07725584,
        -1.01811304, -0.84719316],
       [-0.65831106,  0.8975717 , -0.21605323, ..., -1.09156837,
        -0.9724385 , -0.49724428]])

In [9]:
svm = LinearSVC(dual=False)
print("svm made")
svm = svm.fit(training_images, training_labels)
print("fitted")
score = svm.score(test_images, test_labels)
print("scored")

print("predicting")
svmpred = svm.predict(test_images)
svmpred2 = svm.predict(training_images)

print("EVALUATION ON TESTING DATA")

print(confusion_matrix(test_labels, svmpred))
print(metrics.accuracy_score(test_labels, svmpred))
print(metrics.accuracy_score(training_labels, svmpred2))

svm made
fitted
scored
predicting
EVALUATION ON TESTING DATA
[[363  12]
 [  0   0]]
0.968
0.9841151738473167


In [10]:
def sliding_window(image, stepSize, windowSize):
	# slide a window across the image
	for y in range(0, image.shape[0], stepSize):
		for x in range(0, image.shape[1], stepSize):
			# yield the current window
			yield (x, y, image[y:y + windowSize[1], x:x + windowSize[0]])

#https://www.pyimagesearch.com/2015/03/23/sliding-windows-for-object-detection-with-python-and-opencv/


In [11]:
def createBoxes(image, classifier):
# construct the argument parser and parse the arguments
    windowSizes = [(32,32), (64,64), (128, 128), (256,256), (512, 512)]
    bboxes = []
    
    numRows = image.shape[0]
    third = int(numRows/3)
    croppedImg = image[third:]

    for windowSize in windowSizes:
        (winW, winH) = windowSize
        for (x, y, window) in sliding_window(croppedImg, stepSize=10, windowSize=(winW, winH)):
            # if the window does not meet our desired window size, ignore it
            if window.shape[0] != winH or window.shape[1] != winW:
                continue

            # THIS IS WHERE YOU WOULD PROCESS YOUR WINDOW, SUCH AS APPLYING A
            # MACHINE LEARNING CLASSIFIER TO CLASSIFY THE CONTENTS OF THE
            # WINDOW
            resized_img = cv2.resize(window, (64, 64))

            fd  = cv2HOGDescriptor.compute(resized_img)[:,0]
            
            result = classifier.predict([fd])
            #print(result)
            if result[0] == 0:
                #print("car")
                #plt.imshow(window)
                #plt.show()
                bbox = {"top": y + third, "left": x, "right": x + winW, "bottom": y + winH + third}
                bboxes.append(bbox)

    return bboxes

    #https://www.pyimagesearch.com/2015/03/23/sliding-windows-for-object-detection-with-python-and-opencv/

In [12]:
def non_max_suppression_fast(boxes, overlapThresh):
	# if there are no boxes, return an empty list
	if len(boxes) == 0:
		return []
	# if the bounding boxes integers, convert them to floats --
	# this is important since we'll be doing a bunch of divisions
	if boxes.dtype.kind == "i":
		boxes = boxes.astype("float")
	# initialize the list of picked indexes	
	pick = []
	# grab the coordinates of the bounding boxes
	x1 = boxes[:,0]
	y1 = boxes[:,1]
	x2 = boxes[:,2]
	y2 = boxes[:,3]
	# compute the area of the bounding boxes and sort the bounding
	# boxes by the bottom-right y-coordinate of the bounding box
	area = (x2 - x1 + 1) * (y2 - y1 + 1)
	idxs = np.argsort(y2)
	# keep looping while some indexes still remain in the indexes
	# list
	while len(idxs) > 0:
		# grab the last index in the indexes list and add the
		# index value to the list of picked indexes
		last = len(idxs) - 1
		i = idxs[last]
		pick.append(i)
		# find the largest (x, y) coordinates for the start of
		# the bounding box and the smallest (x, y) coordinates
		# for the end of the bounding box
		xx1 = np.maximum(x1[i], x1[idxs[:last]])
		yy1 = np.maximum(y1[i], y1[idxs[:last]])
		xx2 = np.minimum(x2[i], x2[idxs[:last]])
		yy2 = np.minimum(y2[i], y2[idxs[:last]])
		# compute the width and height of the bounding box
		w = np.maximum(0, xx2 - xx1 + 1)
		h = np.maximum(0, yy2 - yy1 + 1)
		# compute the ratio of overlap
		overlap = (w * h) / area[idxs[:last]]
		# delete all indexes from the index list that have
		idxs = np.delete(idxs, np.concatenate(([last],
			np.where(overlap > overlapThresh)[0])))
	# return only the bounding boxes that were picked using the
	# integer data type
	return boxes[pick].astype("int")

    #https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/

In [17]:
def GetBoxes(n):
    test_path = "benchmark_velocity_test/clips/" + str(n) + "/imgs/040.jpg"
    img = cv2.imread(test_path)
    bboxes = createBoxes(img, svm)
    boxList = []

    for bbox in bboxes:
        boxList.append((bbox["left"], bbox["top"], bbox["right"], bbox["bottom"]))
    
    boundingBoxes = np.array(boxList)
    pick = non_max_suppression_fast(boundingBoxes, 0.3)
    
    for (startX, startY, endX, endY) in pick:
	    cv2.rectangle(img, (startX, startY), (endX, endY), (0, 255, 0), 2)

    if (not os.path.exists('output')):
        print('creating output dir')
        os.mkdir('./output')

    cv2.imwrite("output/output" + str(n) + ".jpg", img)
    print(test_path)

for i in range(1,270):
    GetBoxes(i)

creating output dir
benchmark_velocity_test/clips/1/imgs/040.jpg
benchmark_velocity_test/clips/2/imgs/040.jpg
benchmark_velocity_test/clips/3/imgs/040.jpg
benchmark_velocity_test/clips/4/imgs/040.jpg
benchmark_velocity_test/clips/5/imgs/040.jpg


KeyboardInterrupt: 