In [1]:
import numpy as np
import os
import pickle
import cv2
import sys

In [2]:
def img_tointegral_img(image):
    s = np.zeros((image.shape[0],image.shape[1]))
    integral = np.zeros((image.shape[0],image.shape[1]))
    for i in range(len(image)):
        for j in range(len(image[0])):
            if(i-1 >=0):
                s[i][j] = s[i-1][j] + image[i][j]
            else:
                s[i][j] = image[i][j]
            if(j-1 >=0):
                integral[i][j] = integral[i][j-1] + s[i][j]
            else:
                integral[i][j] = s[i][j]
            
    return integral

In [3]:
def get_image_data(is_train):
    if(is_train == True):
        folder_name = 'trainset'
    else:
        folder_name = 'testset'
    
    faces_train = []
    for i in os.listdir('dataset/'+folder_name+'/faces/'):
        name = 'dataset/'+folder_name+'/faces/' + i
        image = cv2.imread(name, cv2.IMREAD_GRAYSCALE)
        integral = img_tointegral_img(image)
        faces_train.append(integral)
    faces_train = np.array(faces_train)
    non_faces_train = []
    for i in os.listdir('dataset/'+folder_name+'/non-faces/'):
        name = 'dataset/'+folder_name+'/non-faces/' + i
        image = cv2.imread(name, cv2.IMREAD_GRAYSCALE)

        integral = img_tointegral_img(image)
        non_faces_train.append(integral)
    non_faces_train = np.array(non_faces_train)
    
    num_pos = len(faces_train)
    num_neg = len(non_faces_train)
    
    labels = []
    for i in range(num_pos):
        labels.append(1)
    for i in range(num_neg):
        labels.append(0)
    labels = np.array(labels)
    
    train_integral_data = []
    for i in faces_train:
        train_integral_data.append(i)
    for i in non_faces_train:
        train_integral_data.append(i)
    train_integral_data = np.array(train_integral_data)
    return (train_integral_data, labels)

In [8]:
train_input_data, train_labels = get_image_data(True)

In [9]:
test_input_data, test_labels = get_image_data(False)

In [10]:
train_input_data.shape, train_labels.shape

((2499, 19, 19), (2499,))

In [11]:
test_input_data.shape, test_labels.shape

((2473, 19, 19), (2473,))

In [4]:
def calc_value(x1,x2,x3,x4,integral):
    temp_sum = 0
    if(x4[0]>=0 and x4[1]>=0):
        temp_sum += integral[x4[0],x4[1]]
    if(x1[0]>=0 and x1[1]>=0):
        temp_sum += integral[x1[0],x1[1]]
    if(x2[0]>=0 and x2[1]>=0):
        temp_sum -= integral[x2[0],x2[1]]
    if(x3[0]>=0 and x3[1]>=0):
        temp_sum -= integral[x3[0],x3[1]]
    return temp_sum

In [5]:
def calc_features(input_data):
    image = input_data[0]
    features1 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x,y+j]
                        x3 = [x+i,y]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f1_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+j]
                        x4 = [x+i,y+2*j]
                        x2 = [x,y+2*j]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f1_temp2 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(-f1_temp1+f1_temp2)

        features1.append(sample)
    features1 = np.array(features1)
    
    features2 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x+i,y]
                        x3 = [x,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y]
                        x4 = [x+2*i,y+j]
                        x2 = [x+2*i,y]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp2 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(+f2_temp1-f2_temp2)
        features2.append(sample)
    features2 = np.array(features2)
    
    features3 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x,y+j]
                        x3 = [x+i,y]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f1_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+j]
                        x4 = [x+i,y+2*j]
                        x2 = [x,y+2*j]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        #print(x1,x2,x3,x4)
                        f1_temp2 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+2*j]
                        x4 = [x+i,y+3*j]
                        x2 = [x,y+3*j]
                        x3 = [x+i,y+2*j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        #print(x1,x2,x3,x4)
                        f1_temp3 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(+f1_temp1-f1_temp2+f1_temp3)
        features3.append(sample)
    features3 = np.array(features3)
    
    features4 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x+i,y]
                        x3 = [x,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y]
                        x4 = [x+2*i,y+j]
                        x2 = [x+2*i,y]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp2 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+2*i,y]
                        x4 = [x+3*i,y+j]
                        x2 = [x+3*i,y]
                        x3 = [x+2*i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp3 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(+f2_temp1-f2_temp2+f2_temp3)
        features4.append(sample)
    features4 = np.array(features4)
    
    features5 = []
    for integral in input_data:
        sample = []
        for i in range(1,len(image)+1):
            for j in range(1,len(image[0])+1):
                for x in range(-1,len(image)):
                    for y in range(-1,len(image[0])):
                        x1 = [x,y]
                        x4 = [x+i,y+j]
                        x2 = [x+i,y]
                        x3 = [x,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp1 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y]
                        x4 = [x+2*i,y+j]
                        x2 = [x+2*i,y]
                        x3 = [x+i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp2 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x,y+j]
                        x4 = [x+i,y+2*j]
                        x2 = [x+i,y+j]
                        x3 = [x,y+2*j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp3 = calc_value(x1,x2,x3,x4,integral)
                        x1 = [x+i,y+j]
                        x4 = [x+2*i,y+2*j]
                        x2 = [x+i,y+2*j]
                        x3 = [x+2*i,y+j]
                        if(x4[0]>=len(image) or x4[1]>=len(image[0])):
                            continue
                        f2_temp4 = calc_value(x1,x2,x3,x4,integral)
                        sample.append(-f2_temp1+f2_temp2+f2_temp3-f2_temp4)
        features5.append(sample)
    features5 = np.array(features5)
    
    total_features = np.concatenate([features1,features2,features3,features4,features5],axis=1)
    return total_features

In [19]:
train_features = calc_features(train_input_data)

In [20]:
np.save('train_features.npy',train_features)
np.save('train_labels.npy',train_labels)

In [21]:
test_features = calc_features(test_input_data)

In [22]:
np.save('test_features.npy',test_features)
np.save('test_labels.npy',test_labels)

In [23]:
# loading train and test data

In [6]:
train_features = np.load('train_features.npy')
train_labels = np.load('train_labels.npy')

In [7]:
test_features = np.load('test_features.npy')
test_labels = np.load('test_labels.npy')

In [8]:
train_features.shape, test_features.shape

((2499, 63960), (2473, 63960))

In [9]:
# Initializing weights

In [10]:
num_pos = np.sum(train_labels)
num_neg = len(train_labels) - num_pos

In [11]:
weights = []
for i in range(num_pos):
    weights.append(1/float(2*num_pos))
for i in range(num_neg):
    weights.append(1/float(2*num_neg))
weights = np.array(weights)
weights = weights/np.sum(weights)

In [12]:
def weak_classifier(image_index,feature_index,threshold,polarity,is_train):
    if(is_train==True):
        if(polarity*train_features[image_index,feature_index] <  polarity*threshold):
            return 1
        else:
            return 0
    else:
        if(polarity*test_features[image_index,feature_index] <  polarity*threshold):
            return 1
        else:
            return 0

In [13]:
def one_round(weights):
    t_pos = 0
    t_neg = 0
    for i in range(len(train_labels)):
        if(train_labels[i]==1):
            t_pos += weights[i]
        else:
            t_neg += weights[i]
        
    all_clfs = []
    for feature_index in range(train_features.shape[1]):
        
        feature_values = train_features[:,feature_index]
        sorted_feature_value_indices = np.argsort(feature_values)
        s_pos = 0
        s_neg = 0
        weights_pos_till_now,weights_neg_till_now = 0, 0
        values = [sys.maxsize,0,0] # error,feature_threshold,feature_polarity
        for i in sorted_feature_value_indices:
            cur_error = min(weights_neg_till_now+t_pos-weights_pos_till_now,weights_pos_till_now+t_neg-weights_neg_till_now)
            if(cur_error<values[0]):
                values[0] = cur_error
                values[1] = feature_values[i]
                if(s_pos > s_neg):
                    values[2] = 1
                else:
                    values[2] = -1
            if(train_labels[i]==1):
                s_pos += 1
                weights_pos_till_now += weights[i]
            else:
                s_neg += 1
                weights_neg_till_now += weights[i]
        all_clfs.append([feature_index,values[1],values[2]])
    
    best_clf_index,best_err = 0,sys.maxsize
    for clf_index,clf in enumerate(all_clfs):
        
        cur_fp = 0.01
        cur_fn = 0.01
        for i in range(train_features.shape[0]):
            if(train_labels[i]==0 and weak_classifier(i,clf[0],clf[1],clf[2],True)==1):
                cur_fp += weights[i]
            if(train_labels[i]==1 and weak_classifier(i,clf[0],clf[1],clf[2],True)==0):
                cur_fn += weights[i]
        cur_fp = cur_fp/float(num_neg)
        cur_fn = cur_fn/float(num_pos)
        total_criteria = 0.3*cur_fp + 0.7*cur_fn
        if(total_criteria < best_err):
            best_clf_index,best_err = clf_index, total_criteria      
    return all_clfs[best_clf_index], best_err

In [14]:
rounds = []
for each_round in range(5):
    print(each_round)
    clf, error = one_round(weights)
    beta = error / float(1-error)
    for i in range(train_features.shape[0]):
      #  if(weak_classifier(i,clf[0],clf[1],clf[2],True)==train_labels[i]):
        if(train_labels[i]==1 and weak_classifier(i,clf[0],clf[1],clf[2],True)==0):
            weights[i] = weights[i] / float(beta)
    weights = weights/np.sum(weights)
    alpha = np.log(1/float(beta))
    print(clf,alpha)
    rounds.append([clf,alpha])

0
0.0003213432756294154 7.325000000000005e-05 0.00042766896518487915
0.0003008796724209945 8.937500000000007e-05 0.0003915245320299922
0.00024101876493467917 8.662500000000006e-05 0.00030718752133525594
0.0001544730531704693 0.00010737500000000008 0.0001746579331006704
0.00014886996894590792 0.00011212500000000008 0.00016461781277986846
0.00010684513204766252 6.575000000000004e-05 0.00012445733149666073
7.987591199432938e-05 6.487500000000005e-05 8.630487427761339e-05
7.929402883924165e-05 5.825000000000005e-05 8.831289834177378e-05
7.895526252906619e-05 4.7750000000000036e-05 9.232894647009455e-05
7.12534446146803e-05 5.487500000000004e-05 7.827277802097184e-05
[15272, 93.0, -1] 9.54919613853166
1
6.397561619573606e-05 5.5984630737559905e-06 8.899439610515609e-05
6.397341129187808e-05 5.5681882427381865e-06 8.90042211700809e-05
1.5709474194567945e-05 5.593264365399398e-06 2.004499269278304e-05
1.5708920645129118e-05 5.568494049112103e-06 2.005481775770784e-05
1.570396658187166e-05 5.5

In [15]:
rounds_arr = np.array(rounds)

In [None]:
with open('rounds_arr_fp_v4.pickle', 'wb') as handle:
    pickle.dump(rounds_arr, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('rounds_arr_fp_v4.pickle', 'rb') as handle:
    rounds_arr = pickle.load(handle)

In [35]:
rounds_arr

array([[list([15272, 93.0, -1]), 9.54919613853166],
       [list([10975, -514.0, -1]), 11.064571079140466],
       [list([3913, 35.0, -1]), 11.065391239462793],
       [list([9789, -292.0, -1]), 11.069422903482543],
       [list([14338, -373.0, -1]), 11.06968900520203]], dtype=object)

In [36]:
def strong_clf(image_index,num_rounds,is_train):
    total = 0
    for round_index in range(num_rounds):
        if(weak_classifier(image_index,rounds_arr[round_index][0][0],rounds_arr[round_index][0][1],rounds_arr[round_index][0][2],is_train)==1):
            total += rounds_arr[round_index][1]
    if(total>= 0.5*sum(rounds_arr[:num_rounds,1])):
        return 1
    else:
        return 0

In [37]:
for desired_value in range(1,6):
    cur_acc = 0
    num_rounds = desired_value
    for i in range(train_features.shape[0]):
        if(strong_clf(i,num_rounds,True)==train_labels[i]):
            cur_acc += 1
    cur_acc = cur_acc/float(train_features.shape[0])
    print('Training acc:',cur_acc)

Training acc: 0.8287314925970388
Training acc: 0.45178071228491395
Training acc: 0.6190476190476191
Training acc: 0.44617847138855543
Training acc: 0.4657863145258103


In [38]:
for desired_value in range(1,6):
    fp = 0
    tn = 0
    num_rounds = desired_value
    for i in range(train_features.shape[0]):
        if(train_labels[i]==0):
            tn += 1
            if(strong_clf(i,num_rounds,True)==1):
                fp += 1
    fpr = fp/float(train_features.shape[0])
    print('Training Fasle Postive Rate::',fpr)

Training Fasle Postive Rate:: 0.15966386554621848
Training Fasle Postive Rate:: 0.5478191276510604
Training Fasle Postive Rate:: 0.38095238095238093
Training Fasle Postive Rate:: 0.5538215286114446
Training Fasle Postive Rate:: 0.5342136854741897


In [39]:
for desired_value in range(1,6):
    fn = 0
    tp = 0
    num_rounds = desired_value
    for i in range(train_features.shape[0]):
        if(train_labels[i]==1):
            tp += 1
            if(strong_clf(i,num_rounds,True)==0):
                fn += 1
    fnr = fn/float(train_features.shape[0])
    print('Training Fasle Negative Rate::',fnr)

Training Fasle Negative Rate:: 0.011604641856742696
Training Fasle Negative Rate:: 0.00040016006402561027
Training Fasle Negative Rate:: 0.0
Training Fasle Negative Rate:: 0.0
Training Fasle Negative Rate:: 0.0


In [40]:
for desired_value in range(1,6):
    cur_acc = 0
    num_rounds = desired_value
    for i in range(test_features.shape[0]):
        if(strong_clf(i,num_rounds,False)==test_labels[i]):
            cur_acc += 1
    cur_acc = cur_acc/float(test_features.shape[0])
    print('Testing acc:',cur_acc)

Testing acc: 0.7545491306105944
Testing acc: 0.28063081277800245
Testing acc: 0.6316215123331985
Testing acc: 0.28993125758188437
Testing acc: 0.305701577031945


In [41]:
for desired_value in range(1,6):
    fp = 0
    tn = 0
    num_rounds = desired_value
    for i in range(test_features.shape[0]):
        if(test_labels[i]==0):
            tn += 1
            if(strong_clf(i,num_rounds,False)==1):
                fp += 1
    fpr = fp/float(test_features.shape[0])
    print('Testing Fasle Postive Rate::',fpr)

Testing Fasle Postive Rate:: 0.11888394662353417
Testing Fasle Postive Rate:: 0.6870198139911039
Testing Fasle Postive Rate:: 0.3251112009704812
Testing Fasle Postive Rate:: 0.6874241811564901
Testing Fasle Postive Rate:: 0.6704407602102709


In [42]:
for desired_value in range(1,6):
    fn = 0
    tp = 0
    num_rounds = desired_value
    for i in range(test_features.shape[0]):
        if(test_labels[i]==1):
            tp += 1
            if(strong_clf(i,num_rounds,False)==0):
                fn += 1
    fnr = fn/float(test_features.shape[0])
    print('Testing Fasle Negative Rate::',fnr)

Testing Fasle Negative Rate:: 0.12656692276587142
Testing Fasle Negative Rate:: 0.03234937323089365
Testing Fasle Negative Rate:: 0.04326728669632026
Testing Fasle Negative Rate:: 0.022644561261625555
Testing Fasle Negative Rate:: 0.023857662757784066
