# 1. Face Detection

In [1]:
%matplotlib inline
import cv2
import matplotlib.pyplot as plt

In [2]:
face_classifier = cv2.CascadeClassifier(
    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)

In [3]:
def face_rects_detection(gray_img, scaleFactor=1.1, minNeighbors=5):
    faces = face_classifier.detectMultiScale(
    gray_img, scaleFactor, minNeighbors, minSize=(40, 40)
    )
    return faces

In [4]:
def show_img(cv2_img, figsize=(20,10), axis="off", cmap=None):
    plt.figure(figsize=(20,10))
    if cmap is None:
        plt.imshow(cv2_img)
    else:
        plt.imshow(cv2_img, cmap)
    plt.axis(axis)
    plt.show()

In [5]:
def draw_rects(img, rects, color=(0, 255, 0), width=4):
    img_cpy = img.copy()
    for (x, y, w, h) in rects:
        cv2.rectangle(img_cpy, (x, y), (x + w, y + h), (0, 255, 0), 4)
    return img_cpy

# 2. Data Processing

In [6]:
import os
from skimage import feature
import numpy as np

In [7]:
IMAGE_DIRECTORY = "./images/celeb-data"

In [151]:
def get_collections(directory=IMAGE_DIRECTORY):
    collections = {}
    for root, dirs, files in os.walk(directory, topdown=False):
        for name in files:
            file_path = os.path.join(root, name)
            dir_path = os.path.dirname(file_path)
            dir_name = os.path.basename(dir_path)
            if not dir_name in collections:
                collections[dir_name] = []
            collections[dir_name].append(file_path)
    return collections
        

In [152]:
def exact_one_gray_face(file_path, reshape=None):
    img = cv2.imread(file_path)
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    rects = face_rects_detection(gray_img)
    if len(rects) > 0:
        rect = rects[0]
        x, y, w, h = rect
        crop_image = gray_img[y: y + h, x : x + w]
        if reshape is not None:
            crop_image = cv2.resize(crop_image, reshape)
        return crop_image

In [147]:
def get_gray_collections(dir_collections, reshape=None):
    gray_collections = {}
    for key, files in dir_collections.items():
        if key not in gray_collections:
            gray_collections[key] = []
        for file in files:
            face_img = exact_one_gray_face(file, reshape)
            if face_img is not None:
                gray_collections[key].append(face_img)
    return gray_collections

In [11]:
class LocalBinaryPatterns:
	def __init__(self, numPoints, radius):
		# store the number of points and radius
		self.numPoints = numPoints
		self.radius = radius

	def describe(self, image, eps=1e-7):
		# compute the Local Binary Pattern representation
		# of the image, and then use the LBP representation
		# to build the histogram of patterns
		lbp = feature.local_binary_pattern(image, self.numPoints,
			self.radius, method="uniform")
		(hist, _) = np.histogram(lbp.ravel(),
			bins=np.arange(0, self.numPoints + 3),
			range=(0, self.numPoints + 2))

		# normalize the histogram
		hist = hist.astype("float")
		hist /= (hist.sum() + eps)

		# return the histogram of Local Binary Patterns
		return lbp, hist

In [12]:
def split_image(img, gridW=7, gridH=7):
    img_resized = cv2.resize(img, (img.shape[1] // gridW * gridW, img.shape[0] // gridH * gridH))
    grid_list = []
    stepW = img.shape[1] // gridW
    stepH = img.shape[0] // gridH
    for i in range(0, img_resized.shape[0], stepH):
        for j in range(0, img_resized.shape[1], stepH):
            grid_list.append(img_resized[i:i+stepH, j:j+stepW])
    return grid_list

In [13]:
desc = LocalBinaryPatterns(8 , 2)
def get_vector_feature(grid_list, pattern=desc):
    hists = []
    for grid in grid_list:
        lbp , hist = desc.describe(grid)
        hists.append(hist)
    concat = np.concatenate(hists)
    return concat

In [None]:
collections = get_collections()
gray_collections = get_gray_collections(collections)

In [211]:
grid_list = split_image(face)

In [212]:
feature = get_vector_feature(grid_list)

In [214]:
plt.his()

490

# Random Forest with Gray Face Image

In [206]:
from sklearn.model_selection import train_test_split
import cv2
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from skimage.feature import local_binary_pattern

In [207]:
collections = get_collections("./images/collections")
gray_collections = get_gray_collections(collections, reshape=(64,64))

In [208]:
X = []
y = []
label2Id = {}
id2Label = {}
for idx, label in enumerate(gray_collections.keys()):
    label2Id[label] = idx
    id2Label[idx] = label
for label, gray_imgs in gray_collections.items():
    for gray_img in gray_imgs:
        X.append(gray_img.flatten())
        y.append(label2Id[label])

In [209]:
X = np.array(X).astype(np.float32)
y = np.array(y).astype(np.int32)

In [210]:
X /= 255
n_samples, n_features = X.shape
X -= X.mean(axis=0)
X -= X.mean(axis=1).reshape(n_samples, -1)

In [226]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=21, stratify=y
)

In [227]:
print(len(X_train), len(X_test))

410 137


In [228]:
rtree = cv2.ml.RTrees_create()

In [229]:
num_trees = 50
eps = 0.01
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS,
            num_trees, eps)
rtree.setTermCriteria(criteria)

In [230]:
rtree.setMaxCategories(len(np.unique(y)))

In [231]:
rtree.setMinSampleCount(2)

In [232]:
rtree.setMaxDepth(1000)

In [233]:
train_data = cv2.ml.TrainData_create(X_train, cv2.ml.ROW_SAMPLE, y_train)
rtree.train(train_data)

True

In [234]:
rtree.getMaxDepth()

25

In [235]:
_, y_hat = rtree.predict(X_test)

In [236]:
accuracy_score(y_test, y_hat)

0.6277372262773723

In [237]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(random_state=21, max_depth=25)
tree.fit(X_train, y_train)
tree.score(X_test, y_test)

0.32116788321167883

In [243]:
num_trees = 100
eps = 0.01
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS,
            num_trees, eps)
rtree.setTermCriteria(criteria)
rtree.train(X_train, cv2.ml.ROW_SAMPLE, y_train);
_, y_hat = rtree.predict(X_test)
accuracy_score(y_test, y_hat)

0.708029197080292

# LBPH + Random Forest

In [60]:
from sklearn.model_selection import train_test_split
import cv2
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from skimage.feature import local_binary_pattern

In [108]:
def get_lbp(gray):
    row       = gray.shape[0]
    col       = gray.shape[1]
    neighbors = 8
    radius    = 2
    weight    = [[1,1,1,1,1,1,1],
                 [1,1,1,1,1,1,1],
                 [1,1,1,1,1,1,1],
                 [0,1,1,1,1,1,0],
                 [0,1,1,1,1,1,0],
                 [0,1,1,1,1,1,0],
                 [0,1,1,1,1,1,0]]

    #extract the LBP feature of the whole image
    lbp = local_binary_pattern(gray, 
                             neighbors,
                             radius, 
                             method="uniform")
    local_hist=[]
    for r in range(7):
        for c in range(7):
            
            #the range of the block
            r_start = r * int(row / 7)
            c_start = c * int(col / 7)
            
            if((r + 1) * int(row / 7) <= row):
                r_end = (r + 1) * int(row / 7)
            else:
                r_end = row
            if((c + 1) * int(col / 7) <= col):
                c_end = (c + 1) * int(col / 7)
            else:
                c_end = col
            if not weight[r][c] == 0:
                #get the regional histogram
                (hist_temp, _) = np.histogram(lbp[r_start:r_end, c_start:c_end].ravel(),
                                              bins=np.arange(0, neighbors + 3),
                                              range=(0, neighbors + 2))
                #normalize the histogram
                hist_temp = hist_temp.astype("float")
                hist_temp /= (hist_temp.sum())
                
            local_hist = local_hist + list(hist_temp * weight[r][c])
    return lbp, local_hist

In [153]:
collections = get_collections("./images/collections")
gray_collections = get_gray_collections(collections)

In [187]:
collections.keys()

dict_keys(['0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9'])

In [155]:
X = []
y = []
label2Id = {}
id2Label = {}
for idx, label in enumerate(gray_collections.keys()):
    label2Id[label] = idx
    id2Label[idx] = label
    
desc = LocalBinaryPatterns(8 , 2)
for label, gray_imgs in gray_collections.items():
    for gray_img in gray_imgs:
        lbp, local_hist = get_lbp(gray_img)
        X.append(local_hist)
        y.append(label2Id[label])

In [156]:
X = np.array(X).astype(np.float32)
y = np.array(y).astype(np.int32)

In [157]:
X.shape

(547, 490)

In [158]:
# X /= 255
# n_samples, n_features = X.shape
# X -= X.mean(axis=0)
# X -= X.mean(axis=1).reshape(n_samples, -1)

In [159]:
X.shape

(547, 490)

In [188]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=21, stratify=y
)

In [189]:
len(X_train)

410

In [190]:
rtree = cv2.ml.RTrees_create()

In [191]:
num_trees = 50
eps = 0.01
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS,
            num_trees, eps)
rtree.setTermCriteria(criteria)

In [192]:
rtree.setMaxCategories(len(np.unique(y)))
rtree.setMinSampleCount(2)
rtree.setMaxDepth(1000)

In [193]:
train_data = cv2.ml.TrainData_create(X_train, cv2.ml.ROW_SAMPLE, y_train)
rtree.train(train_data)

True

In [194]:
rtree.getMaxDepth()

25

In [195]:
_, y_hat = rtree.predict(X_test)
accuracy_score(y_test, y_hat)

0.583941605839416

In [196]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(random_state=21, max_depth=25)
tree.fit(X_train, y_train)
tree.score(X_test, y_test)

0.30656934306569344

In [205]:
num_trees = 75
eps = 0.01
criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS,
            num_trees, eps)
rtree.setTermCriteria(criteria)
rtree.train(X_train, cv2.ml.ROW_SAMPLE, y_train);
_, y_hat = rtree.predict(X_test)
accuracy_score(y_test, y_hat)

0.6277372262773723

# OpenCV LBPH

In [245]:
from sklearn.model_selection import train_test_split
import cv2
from sklearn.metrics import accuracy_score

In [248]:
recognizer = cv2.face.LBPHFaceRecognizer_create() 
detector  = cv2.CascadeClassifier(
    cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)

AttributeError: module 'cv2' has no attribute 'face'