## Training Part

### 1. Color Normalization

In [1]:
import numpy as np
import cv2

def gamma_color_normalization(img):
    return 255*(np.sqrt(img/255))

ImportError: No module named 'cv2'

### 2. Gradient Calculation

In [2]:
def grad(img):
    img_gradX = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize = 1)
    img_gradY = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize = 1)
    img_grad = (np.add(np.square(img_gradX), np.square(img_gradY)))
    img_grad_i = np.argmax(img_grad, axis = 2)
    i, j = np.ogrid[:img.shape[0], :img.shape[1]]
    img_gradX = img_gradX[i, j, img_grad_i]
    img_gradY = img_gradY[i, j, img_grad_i]
    return img_gradX, img_gradY

### 3. HOG Descriptor(8x8 block)

In [3]:
def hog(grad_mag, grad_angle):
    hog_vector = np.zeros((1,9))
    grad_angle = grad_angle%180;
    for i in range(grad_mag.shape[0]):
        for j in range(grad_angle.shape[1]):
            l = int(grad_angle[i][j]//20)
            r = (l+1)
            hog_vector[0,l-1] = (grad_mag[i, j])*((r*20 - (grad_angle[i, j]))/20)
            hog_vector[0,r-1] = (grad_mag[i, j])*((grad_angle[i, j] - l*20)/20)
    return hog_vector

### 4. Image Vector

In [4]:
def image_vector(img):
    img_new  = cv2.resize(img, (64, 128))      #width = 64, height = 128
    img_new = img_new.astype('float64')
    img_new = gamma_color_normalization(img_new)
    img_gradX, img_gradY = grad(img_new);
    grad_mag, grad_angle = cv2.cartToPolar(img_gradX, np.abs(img_gradY), angleInDegrees = True)
    grad_angle = grad_angle.astype(np.uint8)
    hog_vector = np.empty(shape = (1, 0))
    for i in range(15):
        for j in range(7):
            temp_vector = np.empty(shape = (1, 0))
            x1, y1 = i*8, j*8
            x2, y2 = x1+8, y1 +8
            temp_vector = np.concatenate((temp_vector,
                                         hog(grad_mag[x1:x2, y1:y2],
                                            grad_angle[x1:x2, y1:y2])), axis = 1)
            x1+=8
            x2 = x1+8
            temp_vector = np.concatenate((temp_vector,
                                         hog(grad_mag[x1:x2, y1:y2],
                                            grad_angle[x1:x2, y1:y2])), axis = 1)
            y1+=8
            y2= y1+8
            temp_vector = np.concatenate((temp_vector,
                                         hog(grad_mag[x1:x2, y1:y2],
                                            grad_angle[x1:x2, y1:y2])), axis = 1)
            x1-=8
            x2-=8
            temp_vector = np.concatenate((temp_vector,
                                         hog(grad_mag[x1:x2, y1:y2],
                                            grad_angle[x1:x2, y1:y2])), axis = 1)
            Norm = np.linalg.norm(temp_vector)
            temp_vector/=Norm;
            hog_vector = np.concatenate(( hog_vector, temp_vector), axis = 1)
    return hog_vector

In [None]:
import os
fold = os.path.realpath('INRIAPerson/test_64x128_H96/pos/crop001001a.png')
fold

### Read images and calculate HOG descriptor

In [6]:
# descriptor = image_vector("/INRIAPerson/train_64x128_H96/pos/crop001001a.png")
# import the necessary packages
from __future__ import print_function
from imutils.object_detection import non_max_suppression
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import glob
import imutils
from sklearn.feature_extraction import image

X = np.zeros(shape = (14596, 3780))
y = np.empty(shape = (14596, 1))
y[:2416] = 1;
y[2416:] = 0;

i = 0
for filename in glob.glob("INRIAPerson/96X160H96/Train/pos/*.png"):
    img = (Image.open(filename).convert('RGB'))
    img1 = (np.array(img.getdata())).reshape(160, 96, 3).astype(np.uint8)
    X[i] = image_vector(img1[16:144,16:80,:])*255
    i+=1
print(i)

for filename in glob.glob("INRIAPerson/Train/neg/*"):
    img = cv2.imread(filename)
    patches = image.extract_patches_2d(img, (128, 64), max_patches = 10)
    for j in range(10):
        X[i] = image_vector(patches[j])*255
        i+=1
print(i)



2416
14596


array([[ 0.14993267,  0.24720085,  0.06180021, ...,  0.24432929,
         0.07421722,  0.27832623]])

#### Preparing data

In [7]:
# print(X.shape, y.shape)
temp = np.concatenate((X,y), axis = 1)
np.random.shuffle(temp)
X = temp[:,:-1]
y = (temp[:,-1])
y.shape = (-1,1)
print(X.shape, y.shape)

(14596, 3780) (14596, 1)


#### Training using SVM Classifier

In [61]:

from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix
import pandas as pd

y = y[~np.isnan(X).any(axis = 1)]
X = X[~np.isnan(X).any(axis = 1)]
print(X.shape, y.shape)
y = y.reshape(-1,)

clf = LinearSVC()
clf.fit(X[:10000,:], y[:10000])
print("Score on test(Validation) data:",clf.score(X[10000:,:], y[10000:]))
c_matrix = confusion_matrix(y[10000:], clf.predict(X[10000:,:]))
df = pd.DataFrame(c_matrix)
df.columns.name = "True Label"
df.index.name = "Predicted Label"
display(df)

(13909, 3780) (13909,)
Score on test(Validation) data: 0.936300844206


True Label,0,1
Predicted Label,Unnamed: 1_level_1,Unnamed: 2_level_1
0,3085,125
1,124,575


#### Saving Coefficients

In [None]:
print((clf.coef_).shape, X.shape, clf.intercept_.shape)
intercept = clf.intercept_.reshape(-1,1)
print((clf.coef_).shape, X.shape, intercept.shape)
np.savetxt("coef.txt", np.hstack(( clf.coef_, intercept)))

<hr style = "height:2px">

** I couldn't be able to test it as sliding window method is taking a lot of time. We need to use library CUDA and more optimized algorithm for sliding window. So I am leaving that part and we will see how it works in real life using openCv pre-trained model.**

## openCv Pre-Trained model

In [62]:
from imutils.object_detection import non_max_suppression
from imutils import paths
import numpy as np
import imutils
import cv2

# initialize the HOG descriptor/person detector
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

imagePath = "INRIAPerson/Train/pos/crop001003.png"
# load the image and resize it.
image = cv2.imread(imagePath)
image = imutils.resize(image, width=min(400, image.shape[1]))
orig = image.copy()

# detect people in the image
(rects, weights) = hog.detectMultiScale(image, winStride=(4, 4),
    padding=(8, 8), scale=1.05)

# draw the original bounding boxes
for (x, y, w, h) in rects:
    cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 0, 255), 2)

# apply non-maxima suppression.
rects = np.array([[x, y, x + w, y + h] for (x, y, w, h) in rects])
pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

# draw the final bounding boxes
for (xA, yA, xB, yB) in pick:
    cv2.rectangle(image, (xA, yA), (xB, yB), (0, 255, 0), 2)

# show the output images
cv2.imshow("IMAGE", image)
if cv2.waitKey(0)==27:
    cv2.destroyAllWindows()