### This file runs the Face Recognition (section 4) section

In [1]:
import math
import glob
import cv2
import numpy as np
from matplotlib import pyplot as plt
from copy import deepcopy
from skimage.feature import hog, local_binary_pattern
from sklearn.mixture import GaussianMixture
from sklearn.metrics import accuracy_score, confusion_matrix
import json
import sys

In [None]:
class HarrisFeature():
    def __init__(self, x, y):
        self.pt = (x, y)

train_images = {}
test_images = {}

#assigns all training and test images to dictionary
for i in range(0, 75):
    file = "./pictures/train/train" + str(i) + ".jpg"
    train_images["train" + str(i) + ".jpg"] = cv2.cvtColor(cv2.imread(file), cv2.COLOR_BGR2RGB)   
for j in range(0,70):
    file = "./pictures/test/test" + str(j) + ".jpg"
    test_images["test" + str(j) + ".jpg"] = cv2.cvtColor(cv2.imread(file), cv2.COLOR_BGR2RGB)
    
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml')

# grab the original images
def clean_filename(path):
    strings = path.split('/')
    return strings[-1]

def show_bounding_box(train_images, keys, face_coords, method='sift'):
    keypoints = []
    for key in keys:
        x, y, w, h = face_coords[key]
        img = train_images[key]
        cpy = deepcopy(train_images[key])
        cv2.rectangle(cpy, (x, y), (x+w, y+h), (255, 0, 0), 2)
        if method == 'sift':
            # create a mask image filled with zeros, the size of original image
            mask = np.zeros(cpy.shape[:2], dtype=np.uint8)
            sift = cv2.xfeatures2d.SIFT_create()

            # draw your selected ROI on the mask image
            cv2.rectangle(mask, (x, y), (x+w, y+h), (255), thickness = -1)

            # provide mask to the detect method
            kp, descriptorsTrain = sift.detectAndCompute(cpy,mask)
            keypoints.append(kp)
            img=cv2.drawKeypoints(img,kp,cpy)
        elif method == 'harris':
            box = extract_box_from_image(train_images[key], (x, y, w, h))
            gray_box = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY)
            block_size = 4
            k_size = 3
            k = 0.04
            corners = cv2.cornerHarris(gray_box, block_size, k_size, k)
            threshold = 0.5 * corners.max()
            kp_coords = np.where(corners >= threshold)
            y_coords = kp_coords[0]
            x_coords = kp_coords[1]
            y_coords += y
            x_coords += x
            kp = []
            for x, y in zip(x_coords, y_coords):
                kp.append(HarrisFeature(x, y))
            keypoints.append(kp)
    return keypoints
            

def bound_faces(face_coords, images, method='sift'):
    keypoints = {}
    for key in images:
        # Detect faces
        if key in face_coords.keys():
            keypoints[key] = show_bounding_box(images, [key], face_coords, method)[0]
        else:
            faces = face_cascade.detectMultiScale(images[key], 1.05, 5,flags=cv2.CASCADE_SCALE_IMAGE)
            if len(faces) == 0:
                error_faces.append(key)
                continue
            for (x, y, w, h) in faces:
                face_coords[key] = (x, y, w, h)
                img = images[key]
                cpy = deepcopy(images[key])

                cv2.rectangle(cpy, (x, y), (x+w, y+h), (255, 0, 0), 2)
    
                if method == 'sift':
                    # create a mask image filled with zeros, the size of original image
                    mask = np.zeros(cpy.shape[:2], dtype=np.uint8)
                    sift = cv2.xfeatures2d.SIFT_create()

                    # draw your selected ROI on the mask image
                    cv2.rectangle(mask, (x, y), (x+w, y+h), (255), thickness = -1)

                    # provide mask to the detect method
                    kp, descriptorsTrain = sift.detectAndCompute(cpy,mask)
                    img=cv2.drawKeypoints(img,kp,cpy)
                elif method == 'harris':
                    box = extract_box_from_image(images[key], (x, y, w, h))
                    gray_box = cv2.cvtColor(box, cv2.COLOR_RGB2GRAY)
                    block_size = 4
                    k_size = 3
                    k = 0.04
                    corners = cv2.cornerHarris(gray_box, block_size, k_size, k)
                    threshold = 0.5 * corners.max()
                    kp_coords = np.where(corners >= threshold)
                    y_coords = kp_coords[0]
                    x_coords = kp_coords[1]
                    y_coords += y
                    x_coords += x
                    kp = []
                    for x, y in zip(x_coords, y_coords):
                        kp.append(HarrisFeature(x, y))
                keypoints[key] = kp
                
    return keypoints

def extract_box_from_image(image, box_dims):
    x, y, w, h = box_dims
    return image[y : y + h, x : x + w]

def create_patch(dim, image, key_points):
    upper_bound = int(math.ceil(dim/2))
    lower_bound = int(math.floor(dim/2))
    patches = []
    for kp in key_points:
        x = int(kp.pt[0])
        y = int(kp.pt[1])
        
        if x + upper_bound > image.shape[1] or x - lower_bound < 0:
            print("error: {} x {} patch centered at this keypoint not in bounds of image".format(dim, dim))
            continue
        
        if y + upper_bound > image.shape[0] or y - lower_bound < 0:
            print("error: {} x {} patch centered at this keypoint not in bounds of image".format(dim, dim))
            continue
        
        patch = image[y - lower_bound : y + upper_bound, x - lower_bound : x + upper_bound]
        patches.append(patch)
        
    return patches

def extract_hogs(patches, cell_size):
    hogs = []
    for p in patches:
        hog_ft = hog(p, orientations=9, cells_per_block=(2,2), pixels_per_cell=cell_size, multichannel=True, feature_vector=True)
        hogs.append(hog_ft)
    return hogs
def extract_lbps(patches, radius):
    lbps = []
    for p in patches:
        gray_p = cv2.cvtColor(p, cv2.COLOR_RGB2GRAY)
        lbp_ft = local_binary_pattern(gray_p, P=radius*8, R=radius)
        lbps.append(lbp_ft.flatten())
    return lbps

def get_data_and_labels(imgs, kps, c_size=None, patch_size=None, descriptor='hog', radius=None):
    if descriptor == 'hog':
        if c_size is None or patch_size is None:
            print("c_size and patch_size must be specified for HOG")
            exit(1)
        all_train = []
        image_to_hogs = {}
        for key in imgs:

            if key in error_faces:
                continue

            img_patches = create_patch(patch_size, imgs[key], kps[key])
            t_hog = extract_hogs(img_patches, c_size)
            image_to_hogs[key] = t_hog
            all_train.extend(t_hog)

        return all_train, image_to_hogs
    elif descriptor == 'lbp':
        if radius is None:
            print("Please specify a radius when requesting LBP features")
            exit(1)
        all_train = []
        image_to_lbps = {}
        for key in imgs:

            if key in error_faces:
                continue

            img_patches = create_patch(patch_size, imgs[key], kps[key])
            t_lbp = extract_lbps(img_patches, radius)
            image_to_lbps[key] = t_lbp
            all_train.extend(t_lbp)
        return all_train, image_to_lbps


def cluster_training_data(train_hogs, name_hogs, n_restarts, vocab_size):
    n_comps = vocab_size
    images_to_bow = {}
    gmm = GaussianMixture(n_components=n_comps, n_init=n_restarts, reg_covar=1e-5, covariance_type='spherical', max_iter=200)
    gmm.fit(train_hogs)
    
    counter = 0
    
    for key in name_hogs:
        pred = gmm.predict(name_hogs[key])
        bag = [0] * n_comps
        for i in pred:
            bag[i] = bag[i] + 1
            
        sum_bag = np.sum(bag)

        bag = np.array(bag) / sum_bag
        images_to_bow[key] = bag
        
#         if counter < 3:
#             plt.bar([i for i in range(n_comps)], bag)
#             plt.show()
            
        counter += 1
        
    return images_to_bow, gmm
   
def find_closest_point(point, points):
    closest_label = ""
    closest_val = sys.maxsize
    for p in points:
        dist = np.linalg.norm(point - points[p])
        if dist < closest_val:
            closest_val = dist
            closest_label = p
            
    return closest_label
            
    # preds = gmm.predict(train_hogs)
    # print(np.array(preds).shape)
#     preds = gmm.predict(test_hogs)
#     print("Accuracy: {}".format(accuracy_score(test_labels, preds)))
    # cf = confusion_matrix(list(test_labels.values()), preds)
    # plt.imshow(cf)
    # plt.show()
 
error_faces = []
    
face_coords = {
    "train43.jpg": (100, 80, 30, 30),
    "train31.jpg": (105, 85, 30, 30),
    "train36.jpg": (105 ,70, 50, 50),
    "train29.jpg": (105, 100, 30, 30),
    "train56.jpg": (110, 65, 30, 30),
    "train21.jpg": (110, 65, 40, 40),
    "train72.jpg": (105, 90, 35, 35),
    "train45.jpg": (100, 90, 35, 35),
    "train9.jpg": (100, 50, 60, 60)
}

test_face_coords = {
    "test0.jpg": (90,50,60,60),
    "test1.jpg": (105,50,50,50),
    "test2.jpg": (90,55,50,50),
    "test3.jpg":(85,65,40,40),
    "test4.jpg": (78,65,45,45),
    "test5.jpg": (105,65,45,45),
    "test6.jpg": (110,65,60,60),
    "test7.jpg": (110,85,50,50),
    "test8.jpg": (110,70,50,50),
    "test9.jpg": (110,70,40,40),
    "test10.jpg": (90,40,60,60),
    "test11.jpg": (110,80,45,45),
    "test12.jpg": (100,50,60,60),
    "test13.jpg": (90,50,50,50),
    "test14.jpg": (80,70,50,50),
    "test16.jpg": (90,50,70,70),
    "test21.jpg": (110,30,70,70),
    "test24.jpg": (90,80,40,40),
    "test30.jpg": (110,45,40,40),
    "test31.jpg": (100,60,50,50),
    "test33.jpg": (80,80,50,50),
    "test38.jpg": (80,80,40,40),
    "test39.jpg": (110,60,60,60),
    "test41.jpg": (90,50,50,50),
    "test44.jpg": (120,50,40,40),
    "test46.jpg": (110,40,70,70),
    "test47.jpg": (80,40,60,60),
    "test50.jpg": (100,50,60,60),
    "test52.jpg": (100,100,40,40),
    "test55.jpg": (100,50,50,50),
    "test56.jpg": (90,50,50,50),
    "test57.jpg": (110,60,50,50),
    "test58.jpg": (80,70,50,50),
    "test59.jpg": (120,40,60,60),
    "test62.jpg": (100,60,50,50),
    "test65.jpg": (100,50,45,45),
    "test67.jpg": (120,80,45,45)
}

def grid_search(vocab_sizes, patch_sizes, n_restarts, lbp_radii):
    best_hog_v = None
    best_cell_size = None
    best_hog_patch_size = None
    best_hog_accuracy = 0
    best_lbp_accuracy = 0
    best_radius = None
    best_lbp_v = None
    best_lbp_patch_size = None
    hog_confusion_matrix = None
    lbp_confusion_matrix = None
    count = 0
    for v in vocab_sizes:
        count += 1
        print("Iteration {} out of {}".format(count, len(vocab_sizes)))

        for patch_size in patch_sizes:
            for new_cell_size in [(3, 3),(4,4),(5,5)]:
                # HOGS
                all_train, named_hogs =  get_data_and_labels(train_images, keypoints, c_size=new_cell_size, patch_size=patch_size)
                all_test, named_hogs_test = get_data_and_labels(test_images,test_keypoints, new_cell_size, patch_size)
                train_hists, classifier = cluster_training_data(all_train, named_hogs, n_restarts, v)


                true = []
                predicted = []
                for key in named_hogs_test:
                    pred = classifier.predict(named_hogs_test[key])
                    bag = [0] * v
                    for i in pred:
                        bag[i] = bag[i] + 1

                    sum_bag = np.sum(bag)
                    bag = np.array(bag) / sum_bag
                    l = find_closest_point(bag, train_hists)
                    true.append(test_labels[key])
                    predicted.append(labels[l])
                acc = accuracy_score(true, predicted)
#                 print(v, new_cell_size, patch_size, acc)
                if acc > best_hog_accuracy:
                    best_hog_accuracy = acc
                    best_hog_v = v
                    best_cell_size = new_cell_size
                    best_hog_patch_size = patch_size
                    hog_confusion_matrix = confusion_matrix(true, predicted)
            print("Current best  HOG accuracy is {} with vocab_size={} and cell_size={} and patch_size={}".format(best_hog_accuracy, best_hog_v, best_cell_size, best_hog_patch_size))
            # LBP
            for radius in lbp_radii:
                all_train, named_lbps =  get_data_and_labels(train_images, keypoints, descriptor='lbp', radius=radius, patch_size=patch_size)
                all_test, named_lbps_test = get_data_and_labels(test_images,test_keypoints, descriptor='lbp', radius=radius, patch_size=patch_size)

                train_hists, classifier = cluster_training_data(all_train, named_lbps, n_restarts, v)


                true = []
                predicted = []
                for key in named_lbps_test:
                    pred = classifier.predict(named_lbps_test[key])
                    bag = [0] * v
                    for i in pred:
                        bag[i] = bag[i] + 1

                    sum_bag = np.sum(bag)
                    bag = np.array(bag) / sum_bag
                    l = find_closest_point(bag, train_hists)
                    true.append(test_labels[key])
                    predicted.append(labels[l])
                acc = accuracy_score(true, predicted)
                if acc > best_lbp_accuracy:
                    best_lbp_accuracy = acc
                    best_lbp_v = v
                    best_radius = radius
                    best_lbp_patch_size = patch_size
                    print(true[0], predicted[0])
                    lbp_confusion_matrix = confusion_matrix(true, predicted)
            print("Current best  LBP accuracy is {} with vocab_size={} and radius={} and patch_size={}".format(best_lbp_accuracy, best_lbp_v, best_radius, best_lbp_patch_size))

    print("Best HOG accuracy is {} with vocab_size={} and cell_size={} and patch_size={}".format(best_hog_accuracy, best_hog_v, best_cell_size, best_hog_patch_size))
    plt.imshow(hog_confusion_matrix)
    plt.show()
    print("Best LBP accuracy is {} with vocab_size={} and radius={} and patch_size={}".format(best_lbp_accuracy, best_lbp_v, best_radius, best_lbp_patch_size))
    plt.imshow(lbp_confusion_matrix)
    plt.show()
keypoints = bound_faces(face_coords, train_images, method='sift')

with open("pictures/train/labels.json") as f:
    labels = json.load(f)


test_keypoints = bound_faces(test_face_coords, test_images, method='sift')

with open("pictures/test/labels.json") as f:
    test_labels = json.load(f)

test_vocabs = [x for x in range(10, 310, 10)]
n_restarts = 3
patch_sizes = [15]
lbp_radii = [2, 7, 12]
grid_search(test_vocabs, patch_sizes, n_restarts, lbp_radii)

# Harris
# Best HOG accuracy is 0.4927536231884058 with vocab_size=41 and cell_size=(4, 4) and patch_size=17
# Best LBP accuracy is 0.5362318840579711 with vocab_size=71 and radius=7 and patch_size=13

# SIFT
# Best HOG accuracy is 0.5217391304347826 with vocab_size=100 and cell_size=(4, 4) and patch_size=15
# Best LBP accuracy is 0.4492753623188406 with vocab_size=120 and radius=2 and patch_size=15

# SIFT Patches= 10, 15, 25
# Best HOG accuracy is 0.4782608695652174 with vocab_size=100 and cell_size=(4, 4) and patch_size=25

Iteration 1 out of 30
Current best  HOG accuracy is 0.4142857142857143 with vocab_size=10 and cell_size=(4, 4) and patch_size=15
1 4
1 2
Current best  LBP accuracy is 0.32857142857142857 with vocab_size=10 and radius=7 and patch_size=15
Iteration 2 out of 30
Current best  HOG accuracy is 0.4142857142857143 with vocab_size=10 and cell_size=(4, 4) and patch_size=15
Current best  LBP accuracy is 0.32857142857142857 with vocab_size=10 and radius=7 and patch_size=15
Iteration 3 out of 30
Current best  HOG accuracy is 0.4142857142857143 with vocab_size=10 and cell_size=(4, 4) and patch_size=15
1 2
