In [1]:
import numpy as np
import os
import pickle
import cv2
import sys

In [2]:
def img_tointegral_img(image):
    s = np.zeros((image.shape[0],image.shape[1]))
    integral = np.zeros((image.shape[0],image.shape[1]))
    for i in range(len(image)):
        for j in range(len(image[0])):
            if(i-1 >=0):
                s[i][j] = s[i-1][j] + image[i][j]
            else:
                s[i][j] = image[i][j]
            if(j-1 >=0):
                integral[i][j] = integral[i][j-1] + s[i][j]
            else:
                integral[i][j] = s[i][j]
            
    return integral

In [3]:
def get_image_data(is_train):
    if(is_train == True):
        folder_name = 'trainset'
    else:
        folder_name = 'testset'
    
    faces_train = []
    for i in os.listdir('dataset/'+folder_name+'/faces/'):
        name = 'dataset/'+folder_name+'/faces/' + i
        image = cv2.imread(name, cv2.IMREAD_GRAYSCALE)
        integral = img_tointegral_img(image)
        faces_train.append(integral)
    faces_train = np.array(faces_train)
    non_faces_train = []
    for i in os.listdir('dataset/'+folder_name+'/non-faces/'):
        name = 'dataset/'+folder_name+'/non-faces/' + i
        image = cv2.imread(name, cv2.IMREAD_GRAYSCALE)

        integral = img_tointegral_img(image)
        non_faces_train.append(integral)
    non_faces_train = np.array(non_faces_train)
    
    num_pos = len(faces_train)
    num_neg = len(non_faces_train)
    
    labels = []
    for i in range(num_pos):
        labels.append(1)
    for i in range(num_neg):
        labels.append(0)
    labels = np.array(labels)
    
    train_integral_data = []
    for i in faces_train:
        train_integral_data.append(i)
    for i in non_faces_train:
        train_integral_data.append(i)
    train_integral_data = np.array(train_integral_data)
    return (train_integral_data, labels)

In [8]:
train_input_data, train_labels = get_image_data(True)

In [9]:
test_input_data, test_labels = get_image_data(False)

In [10]:
train_input_data.shape, train_labels.shape

((2499, 19, 19), (2499,))

In [11]:
test_input_data.shape, test_labels.shape

((2473, 19, 19), (2473,))

In [4]:
def calc_value(x1,x2,x3,x4,integral):
    temp_sum = 0
    if(x4[0]>=0 and x4[1]>=0):
        temp_sum += integral[x4[0],x4[1]]
    if(x1[0]>=0 and x1[1]>=0):
        temp_sum += integral[x1[0],x1[1]]
    if(x2[0]>=0 and x2[1]>=0):
        temp_sum -= integral[x2[0],x2[1]]
    if(x3[0]>=0 and x3[1]>=0):
        temp_sum -= integral[x3[0],x3[1]]
    return temp_sum

In [5]:
def calc_features(input_data):
    image = input_data[0]
    features1 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x,y+j]
                        x3 = [x+i,y]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f1_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+j]
                        x4 = [x+i,y+2*j]
                        x2 = [x,y+2*j]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f1_temp2 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(-f1_temp1+f1_temp2)

        features1.append(sample)
    features1 = np.array(features1)
    
    features2 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x+i,y]
                        x3 = [x,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y]
                        x4 = [x+2*i,y+j]
                        x2 = [x+2*i,y]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp2 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(+f2_temp1-f2_temp2)
        features2.append(sample)
    features2 = np.array(features2)
    
    features3 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x,y+j]
                        x3 = [x+i,y]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f1_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+j]
                        x4 = [x+i,y+2*j]
                        x2 = [x,y+2*j]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        #print(x1,x2,x3,x4)
                        f1_temp2 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+2*j]
                        x4 = [x+i,y+3*j]
                        x2 = [x,y+3*j]
                        x3 = [x+i,y+2*j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        #print(x1,x2,x3,x4)
                        f1_temp3 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(+f1_temp1-f1_temp2+f1_temp3)
        features3.append(sample)
    features3 = np.array(features3)
    
    features4 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x+i,y]
                        x3 = [x,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y]
                        x4 = [x+2*i,y+j]
                        x2 = [x+2*i,y]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp2 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+2*i,y]
                        x4 = [x+3*i,y+j]
                        x2 = [x+3*i,y]
                        x3 = [x+2*i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp3 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(+f2_temp1-f2_temp2+f2_temp3)
        features4.append(sample)
    features4 = np.array(features4)
    
    features5 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x+i,y]
                        x3 = [x,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y]
                        x4 = [x+2*i,y+j]
                        x2 = [x+2*i,y]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp2 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+j]
                        x4 = [x+i,y+2*j]
                        x2 = [x+i,y+j]
                        x3 = [x,y+2*j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp3 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y+j]
                        x4 = [x+2*i,y+2*j]
                        x2 = [x+i,y+2*j]
                        x3 = [x+2*i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp4 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(-f2_temp1+f2_temp2+f2_temp3-f2_temp4)
        features5.append(sample)
    features5 = np.array(features5)
    
    total_features = np.concatenate([features1,features2,features3,features4,features5],axis=1)
    return total_features

In [19]:
train_features = calc_features(train_input_data)

In [20]:
np.save('train_features.npy',train_features)
np.save('train_labels.npy',train_labels)

In [21]:
test_features = calc_features(test_input_data)

In [22]:
np.save('test_features.npy',test_features)
np.save('test_labels.npy',test_labels)

In [23]:
# loading train and test data

In [39]:
train_features = np.load('train_features.npy')
train_labels = np.load('train_labels.npy')

In [40]:
test_features = np.load('test_features.npy')
test_labels = np.load('test_labels.npy')

In [41]:
train_features.shape, test_features.shape

((2499, 63960), (2473, 63960))

In [42]:
# Initializing weights

In [43]:
num_pos = np.sum(train_labels)
num_neg = len(train_labels) - num_pos

In [44]:
weights = []
for i in range(num_pos):
    weights.append(1/float(2*num_pos))
for i in range(num_neg):
    weights.append(1/float(2*num_neg))
weights = np.array(weights)
weights = weights/np.sum(weights)

In [45]:
def weak_classifier(image_index,feature_index,threshold,polarity,is_train):
    if(is_train==True):
        if(polarity*train_features[image_index,feature_index] <  polarity*threshold):
            return 1
        else:
            return 0
    else:
        if(polarity*test_features[image_index,feature_index] <  polarity*threshold):
            return 1
        else:
            return 0

In [46]:
def one_round(weights):
    t_pos = 0
    t_neg = 0
    for i in range(len(train_labels)):
        if(train_labels[i]==1):
            t_pos += weights[i]
        else:
            t_neg += weights[i]
        
    all_clfs = []
    for feature_index in range(train_features.shape[1]):
        
        feature_values = train_features[:,feature_index]
        sorted_feature_value_indices = np.argsort(feature_values)
        s_pos = 0
        s_neg = 0
        weights_pos_till_now,weights_neg_till_now = 0, 0
        values = [sys.maxsize,0,0] # error,feature_threshold,feature_polarity
        for i in sorted_feature_value_indices:
            cur_error = min(weights_neg_till_now+t_pos-weights_pos_till_now,weights_pos_till_now+t_neg-weights_neg_till_now)
            if(cur_error<values[0]):
                values[0] = cur_error
                values[1] = feature_values[i]
                if(s_pos > s_neg):
                    values[2] = 1
                else:
                    values[2] = -1
            if(train_labels[i]==1):
                s_pos += 1
                weights_pos_till_now += weights[i]
            else:
                s_neg += 1
                weights_neg_till_now += weights[i]
        all_clfs.append([feature_index,values[1],values[2]])
    
    best_clf_index,best_err = 0,sys.maxsize
    for clf_index,clf in enumerate(all_clfs):
        
        cur_fp = 0.01
        cur_fn = 0.01
        for i in range(train_features.shape[0]):
            if(train_labels[i]==0 and weak_classifier(i,clf[0],clf[1],clf[2],True)==1):
                cur_fp += weights[i]
            if(train_labels[i]==1 and weak_classifier(i,clf[0],clf[1],clf[2],True)==0):
                cur_fn += weights[i]
        cur_fp = cur_fp/float(num_neg)
        cur_fn = cur_fn/float(num_pos)
        total_criteria = 0.7*cur_fp + 0.3*cur_fn
        if(total_criteria < best_err):
            best_clf_index,best_err = clf_index, total_criteria      
    return all_clfs[best_clf_index], best_err

In [47]:
rounds = []
for each_round in range(5):
    print(each_round)
    clf, error = one_round(weights)
    beta = error / float(1-error)
    for i in range(train_features.shape[0]):
      #  if(weak_classifier(i,clf[0],clf[1],clf[2],True)==train_labels[i]):
        if(train_labels[i]==0 and weak_classifier(i,clf[0],clf[1],clf[2],True)==1):
            weights[i] = weights[i] / float(beta)
    weights = weights/np.sum(weights)
    alpha = np.log(1/float(beta))
    print(clf,alpha)
    rounds.append([clf,alpha])

0
[10142, 106.0, -1] 9.711676590360547
1
[18102, -146.0, 1] 11.542130980586654
2
[6552, -299.0, 1] 11.547766037687158
3
[17455, 157.0, -1] 11.54857433308019
4
[1998, -165.0, 1] 11.550161720659283


In [48]:
rounds_arr = np.array(rounds)

In [None]:
with open('rounds_arr_fp_v4.pickle', 'wb') as handle:
    pickle.dump(rounds_arr, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('rounds_arr_fp_v4.pickle', 'rb') as handle:
    rounds_arr = pickle.load(handle)

In [73]:
rounds_arr

array([[list([10142, 106.0, -1]), 9.711676590360547],
       [list([18102, -146.0, 1]), 11.542130980586654],
       [list([6552, -299.0, 1]), 11.547766037687158],
       [list([17455, 157.0, -1]), 11.54857433308019],
       [list([1998, -165.0, 1]), 11.550161720659283]], dtype=object)

In [74]:
def strong_clf(image_index,num_rounds,is_train):
    total = 0
    for round_index in range(num_rounds):
        if(weak_classifier(image_index,rounds_arr[round_index][0][0],rounds_arr[round_index][0][1],rounds_arr[round_index][0][2],is_train)==1):
            total += rounds_arr[round_index][1]
    if(total>= 0.5*sum(rounds_arr[:num_rounds,1])):
        return 1
    else:
        return 0

In [75]:
for desired_value in range(1,6):
    cur_acc = 0
    num_rounds = desired_value
    for i in range(train_features.shape[0]):
        if(strong_clf(i,num_rounds,True)==train_labels[i]):
            cur_acc += 1
    cur_acc = cur_acc/float(train_features.shape[0])
    print('Training acc:',cur_acc)

Training acc: 0.858343337334934
Training acc: 0.8199279711884754
Training acc: 0.8431372549019608
Training acc: 0.8103241296518607
Training acc: 0.8215286114445778


In [76]:
for desired_value in range(1,6):
    fp = 0
    tn = 0
    num_rounds = desired_value
    for i in range(train_features.shape[0]):
        if(train_labels[i]==0):
            tn += 1
            if(strong_clf(i,num_rounds,True)==1):
                fp += 1
    fpr = fp/float(train_features.shape[0])
    print('Training Fasle Postive Rate::',fpr)

Training Fasle Postive Rate:: 0.12605042016806722
Training Fasle Postive Rate:: 0.008403361344537815
Training Fasle Postive Rate:: 0.0
Training Fasle Postive Rate:: 0.0
Training Fasle Postive Rate:: 0.0


In [77]:
for desired_value in range(1,6):
    fn = 0
    tp = 0
    num_rounds = desired_value
    for i in range(train_features.shape[0]):
        if(train_labels[i]==1):
            tp += 1
            if(strong_clf(i,num_rounds,True)==0):
                fn += 1
    fnr = fn/float(train_features.shape[0])
    print('Training Fasle Negative Rate::',fnr)

Training Fasle Negative Rate:: 0.015606242496998799
Training Fasle Negative Rate:: 0.1716686674669868
Training Fasle Negative Rate:: 0.1568627450980392
Training Fasle Negative Rate:: 0.18967587034813926
Training Fasle Negative Rate:: 0.17847138855542216


In [78]:
for desired_value in range(1,6):
    cur_acc = 0
    num_rounds = desired_value
    for i in range(test_features.shape[0]):
        if(strong_clf(i,num_rounds,False)==test_labels[i]):
            cur_acc += 1
    cur_acc = cur_acc/float(test_features.shape[0])
    print('Testing acc:',cur_acc)

Testing acc: 0.7856854023453296
Testing acc: 0.8083299636069551
Testing acc: 0.8099474322684997
Testing acc: 0.8091386979377274
Testing acc: 0.8091386979377274


In [79]:
for desired_value in range(1,6):
    fp = 0
    tn = 0
    num_rounds = desired_value
    for i in range(test_features.shape[0]):
        if(test_labels[i]==0):
            tn += 1
            if(strong_clf(i,num_rounds,False)==1):
                fp += 1
    fpr = fp/float(test_features.shape[0])
    print('Testing Fasle Postive Rate::',fpr)

Testing Fasle Postive Rate:: 0.07278608976951072
Testing Fasle Postive Rate:: 0.0028305701577031944
Testing Fasle Postive Rate:: 0.0004043671653861706
Testing Fasle Postive Rate:: 0.0
Testing Fasle Postive Rate:: 0.0


In [80]:
for desired_value in range(1,6):
    fn = 0
    tp = 0
    num_rounds = desired_value
    for i in range(test_features.shape[0]):
        if(test_labels[i]==1):
            tp += 1
            if(strong_clf(i,num_rounds,False)==0):
                fn += 1
    fnr = fn/float(test_features.shape[0])
    print('Testing Fasle Negative Rate::',fnr)

Testing Fasle Negative Rate:: 0.14152850788515972
Testing Fasle Negative Rate:: 0.1888394662353417
Testing Fasle Negative Rate:: 0.18964820056611403
Testing Fasle Negative Rate:: 0.19086130206227253
Testing Fasle Negative Rate:: 0.19086130206227253
