# Imports

In [1]:
from time import time
import matplotlib.pyplot as plt
import sklearn
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
import os
import cv2
from skimage.feature import local_binary_pattern
import numpy as np
# libraries to support custom function for copying.

import errno
import shutil


# Dataset

In [2]:
def copy(src, dest):
    try:
        shutil.copytree(src, dest)
    except OSError as e:
        # If the error was caused because the source wasn't a directory
        if e.errno == errno.ENOTDIR:
            shutil.copy(src, dest)
        else:
            print('Directory not copied. Error: %s' % e)


In [5]:

src = 'lfw/'
dest = 'lfw_home'
copy(src, dest)

print(os.listdir(src))
print(os.listdir(dest))

['lfw-funneled.tgz', 'pairs.txt', 'pairsDevTest.txt', 'pairsDevTrain.txt']
['lfw-funneled.tgz', 'pairs.txt', 'pairsDevTest.txt', 'pairsDevTrain.txt']


In [6]:
#Load data
lfw_dataset = sklearn.datasets.fetch_lfw_people(data_home=".", min_faces_per_person=100, download_if_missing=False)

n_samples, h, w = lfw_dataset.images.shape
# for machine learning we use the 2 data directly (as relative pixel
# positions info is ignored by this model)
X = lfw_dataset.data
n_features = X.shape[1]

# the label to predict is the id of the person
y = lfw_dataset.target
target_names = lfw_dataset.target_names
n_classes = target_names.shape[0]
print("Total dataset size:")
print("n_samples: %d" % n_samples)
print("n_features: %d" % n_features)
print("n_classes: %d" % n_classes)

Total dataset size:
n_samples: 1140
n_features: 2914
n_classes: 5


In [8]:
# #############################################################################
# Split into a training set and a test set using a stratified k fold 
# split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42)

# LBPH

In [9]:
class LBPHfromScratch:
    def __init__(self):
        self.R = 1
        self.P = 8
        self.filter_size = 3
        # Anti-clockwise (right -> up + right -> up -> up + left -> left -> down + left -> down -> down + right)
        self.filter_lbp = np.array([[2, 1], [2, 0], [1, 0], [0, 0], [0, 1], [0, 2], [1, 2], [2, 2]])

    def Compute_LBP(self, img):
        # Determine the dimensions of the input image.
        height = img.shape[0]
        width = img.shape[1]

        if width < self.filter_size or height < self.filter_size:
            raise Exception('Too small input image. Should be at least (2*radius+1) x (2*radius+1)')

        out_width = width - self.filter_size + 1
        out_height = height - self.filter_size + 1

        reference_matrix = img[1:1 + out_height, 1:1 + out_width]

        out_img = np.zeros((out_height, out_width))

        for i in range(0, 8):
            step_x, step_y = self.filter_lbp[i]

            sliding_matrix = img[step_y:step_y + out_height, step_x:step_x + out_width]

            flags = np.greater_equal(sliding_matrix, reference_matrix)

            exponent = np.power(2, i)
            out_img = out_img + (flags * exponent)

        return out_img.astype(np.uint8)

# Histogram Comparator

In [10]:
class Matcher:
    def __init__(self):
        self.n_bins = 256
        
    def kullback_leibler_divergence(self, p, q):
        p = np.asarray(p)
        q = np.asarray(q)
        filt = np.logical_and(p != 0, q != 0)
        return np.sum(p[filt] * np.log2(p[filt] / q[filt]))

    def match(self, refs, lbp):
        best_score = float('inf')
        best_name = None
        #hist = cv2.calcHist([lbp], [0], None, [256], [0, 256])
        #hist /= hist.sum()
        hist, _ = np.histogram(lbp, density = True, bins=8 + 2)
        for name, ref_hist in refs:
            print(hist.shape, ref_hist.shape)
            score = self.kullback_leibler_divergence(hist, ref_hist)
            #score = cv2.compareHist(hist, ref_hist, cv2.HISTCMP_CHISQR)
            if np.abs(score) < best_score:
                best_score = np.abs(score)
                best_name = name
        best_score = best_score * 100
        return best_name, best_score


# Initializing #

In [11]:
lbph_2 = LBPHfromScratch()
classifier = Matcher()

# LBPH from scratch

# Training #

In [12]:
enc_list = []
for item_iter, img_name in zip(X_train,y_train):
    gray_img = item_iter.reshape((h, w))
    temp_img = lbph_2.Compute_LBP(gray_img)
    ref_hist = cv2.calcHist([temp_img], [0], None, [256], [0, 256])
    ref_hist /= ref_hist.sum()
    enc_list.append((img_name, ref_hist))

weights_array = np.array(enc_list, dtype=object)

with open('weights.npy', 'wb') as f:
    np.save(f, weights_array)

# Testing #

In [13]:
with open('weights.npy', 'rb') as f:
    weights_array = np.load(f, allow_pickle=True)
    
# Detect Faces
err = 0

for item_iter, img_name in zip(X_test,y_test):
    gray_test_img = item_iter.reshape((h, w))
    lbp_img = lbph_2.Compute_LBP(gray_test_img)
    
    best_score = float('inf')
    best_name = None
    hist = cv2.calcHist([lbp_img], [0], None, [256], [0, 256])
    hist /= hist.sum()
    for name, ref_hist in weights_array:
        score = cv2.compareHist(hist, ref_hist, cv2.HISTCMP_CORREL)
        if np.abs(score) < best_score:
            best_score = np.abs(score)
            best_name = name
            
    if best_name!=img_name:
        err += 1
print("Accuracy: ", err * 100 / len(X_test))


Accuracy:  75.08771929824562


# Skimage LBPH

# Training

In [14]:
enc_list = []
for item_iter, img_name in zip(X_train,y_train):
    gray_img = item_iter.reshape((h, w))
    temp_img = local_binary_pattern(gray_img, 8, 1, 'uniform')
    
    ref_hist, _ = np.histogram(temp_img, density= True,bins=8 + 2)
    enc_list.append((img_name, ref_hist))

weights_array = np.array(enc_list, dtype=object)

with open('weights.npy', 'wb') as f:
    np.save(f, weights_array)

# Testing #

In [15]:
def kullback_leibler_divergence(p, q):
        p = np.asarray(p)
        q = np.asarray(q)
        filt = np.logical_and(p != 0, q != 0)
        return np.sum(p[filt] * np.log2(p[filt] / q[filt]))

In [16]:
with open('weights.npy', 'rb') as f:
    weights_array = np.load(f, allow_pickle=True)
    

# Detect Faces
err = 0

for item_iter, img_name in zip(X_test,y_test):
    gray_test_img = item_iter.reshape((h, w))
    #lbp_img = lbph_2.Compute_LBP(gray_test_img)
    lbp_img = local_binary_pattern(gray_test_img, 8, 1, 'uniform')
    
    best_score = float('inf')
    best_name = None
    #hist = cv2.calcHist([lbp], [0], None, [256], [0, 256])
    #hist /= hist.sum()
    hist, _ = np.histogram(lbp_img, density = True, bins=8 + 2)
    for name, ref_hist in weights_array:
        #print(hist.shape, ref_hist.shape)
        score = kullback_leibler_divergence(hist, ref_hist)
        #score = cv2.compareHist(hist, ref_hist, cv2.HISTCMP_CHISQR)
        if np.abs(score) < best_score:
            best_score = np.abs(score)
            best_name = name
            
    #print(n, img_name)
    if best_name!=img_name:
        err += 1
print("Accuracy: ", err * 100 / len(X_test))


Accuracy:  62.45614035087719
