# Ejercicio 2: Face Detection

Tal como se explica en la letra del obligatorio final en este ejercicio es necesario implementar desde cero una solucion para detección de caras. Se proveen datos de entrenamiento y es necesario implementar su propio algoritmo de sliding window para entrenar un clasificador. Todo el código necesario para comenzar a trabajar está provisto en este notebook.


\**En los ejercicios del trabajo final es posible utilizar funciones de librerias existentes o código sacado de internet. Siempre y cuando **no se usen para resolver explicitamente lo que pide el ejercicio** y al código sacado de interenet le agreguen el link en comentarios de donde fue sacado ese código.*



##### Imports necesarios

In [1]:
import cv2 
import numpy as np
from glob import glob
from enum import Enum
import os
import sklearn 
import sklearn.neighbors
import matplotlib.pyplot as plt
import pickle
from evaluation import *
import sys
#from google.colab.patches import cv2_imshow
from image_utils import *
#from imutils.object_detection import non_max_suppression

from skimage.io import imread, imshow
from skimage import feature #LBP
from skimage.transform import resize #HoG
from PIL import Image
from skimage.feature import hog #HoG
from skimage import data, exposure #HoG


from sklearn.pipeline import make_pipeline #SVM
from sklearn.preprocessing import StandardScaler #SVM
from sklearn.svm import SVC #SVM
from sklearn import svm ###

#### Feature Extractors 

Para resolver el ejercicio van a tener que implementar las funciones `extract_hog_features` y `extract_lbp_features`

In [2]:
class FeatureExtractors(Enum):
    MiniImage = 1
    HOG = 2
    LBP = 3

def extract_features(method, img):
    '''Switch between Feature extraction Methods'''

    image_representation = []

    if method == FeatureExtractors.MiniImage:
        image_representation = extract_mini_image_features(img)
    elif method == FeatureExtractors.HOG:
        image_representation = extract_hog_features(img)
    elif method == FeatureExtractors.LBP:
        image_representation = extract_lbp_features(img)

    return image_representation

def extract_mini_image_features(img,resize_size=(64,64)):
    resized_image = cv2.resize(img,resize_size)
    image_representation = resized_image.reshape(resize_size[0]*resize_size[1])
    
    return image_representation


def extract_lbp_features(img):
    radius = 3
    n_points = 8 * radius
    lbp = feature.local_binary_pattern(img, n_points,
                                       radius, method="uniform")
    
    return lbp.flatten()

def extract_hog_features(img):
    fd , hog_image = hog(img, orientations=16, pixels_per_cell=(8,8), cells_per_block=(8,8), visualize=True)
    return fd

In [3]:
img = cv2.imread('./data/face_detection/val_face_detection_images/unseen_Paris_Hilton_0001.jpg')
img_gray =  cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
img_hog = extract_features(FeatureExtractors.HOG, img)

#### Data loader

In [4]:
def load_training_data(training_positive_dir,trainign_negative_dir, feature_extractor=FeatureExtractors.MiniImage):
    ''' Function for loading loading training data from positive and negative examples
    '''
    positive_img_files = sorted(glob(training_positive_dir + '/*'))
    negative_img_files = sorted(glob(trainign_negative_dir + '/*'))
    #comment this line for loading all data
    positive_img_files = positive_img_files[:100]
    negative_img_files = negative_img_files[:200]

    training_data = []
    training_labels = []
    
    print('##Loading {} positive face images'.format(len(positive_img_files)))
    for img in positive_img_files:
        image = cv2.imread(img,cv2.IMREAD_GRAYSCALE)
        image_representation = extract_features(feature_extractor,image)
        training_data.append(image_representation)
        training_labels.append(1)
    
    print('##Loading {} negative face images'.format(len(negative_img_files)))
    for img in negative_img_files:
        image = cv2.imread(img,cv2.IMREAD_GRAYSCALE)
        image_representation = extract_features(feature_extractor,image)
        training_data.append(image_representation)
        training_labels.append(0)   
    
    training_data = np.asarray(training_data)
    training_labels = np.asarray(training_labels)
    return training_data, training_labels

def load_validation_data(validation_data_dir):

    validation_image_files = sorted(glob(validation_data_dir + '/*'))
    val_images = []
    for img_file in validation_image_files:
        image = cv2.imread(img_file,cv2.IMREAD_COLOR)
        val_images.append(image)

    return val_images 

#### Sliding Window


In [5]:
def sliding_window(image, window_size, scale, stride):
    [image_rows, image_cols] = image.shape;
    window_rows = window_size[0];
    window_cols = window_size[1];

    patches = np.zeros((window_rows, window_cols,5));
    bbox_locations = np.zeros((5,4))
    r = np.random.randint(0,image_rows-window_rows,5); # Sample top left position
    c = np.random.randint(0,image_cols-window_cols,5);
    for i in range(0,5):
        patches[:,:,i] = image[r[i]:r[i]+window_rows, c[i]:c[i]+window_cols];
        bbox_locations[i,:] = [r[i],c[i],window_rows,window_cols]; # top-left y,x, height, width

    return patches, bbox_locations


##### Metodos Auxiliares


In [6]:
def show_image_with_bbox(image,bboxes,draw_GT=True):
    GT = [82,91,166,175]
    if draw_GT:
        cv2.rectangle(image, (GT[0],GT[1]), (GT[2],GT[3]), (0, 0, 255), 2)

    for bbox in bboxes:
        if len(bbox) == 4:   
            top_left = (int(bbox[0]),int(bbox[1]))
            bottom_right = (int(bbox[0])+ int(bbox[2]),int(bbox[1])+int(bbox[3]))
            cv2.rectangle(image, top_left, bottom_right, (255, 0, 0), 2)
    plt.imshow(image[...,::-1])
    plt.axis('off')
    plt.show()
    #cv2.imshow('image',image)
    #cv2.waitKey(0) #wait for any key
    #cv2.destroyAllWindows()

### Ubicación de los datos ###

In [7]:
data_dir='./data'
face_detection_dir = os.path.join(data_dir, 'face_detection')
training_faces_dir = os.path.join(face_detection_dir,'cropped_faces')
negative_examples_training_dir = os.path.join(face_detection_dir,'non_faces_images','neg_cropped_img')
validation_faces_dir = os.path.join(face_detection_dir,'val_face_detection_images')
validation_raw_faces_dir = os.path.join(face_detection_dir,'val_raw_images')

## Entrenar Modelo y Face Detection


### Cargar Datos de Entrenamiento ###

In [8]:
#Modify data_loader.py to load more training data
training_data, trainig_labels = load_training_data(training_faces_dir,negative_examples_training_dir, FeatureExtractors.MiniImage)
# You can save traninig_data and labels on nunmpy files to avoid processing data every time. 

##Loading 100 positive face images
##Loading 200 negative face images


### Load Validation Data ###

In [9]:
validation_data = load_validation_data(validation_faces_dir)

### Entrenar un clasificador utilizando los datos de entrenamiento ## 
1. Una vez los datos de entrenamiento han sido cargados es necesario entrenar su propio clasificador 
2. Como solución inicial se utiliza un clasificador KNN pero para tener mejores resultados es posible entrenar un SVM
3. Train your own classifier and save it on 'face_detector' file.Liblinear is suggested sample code for liblinear can be seen in './liblinear-2.30/python/README' file, good starting parameters for the classfier are '-s 2 -B 1'. 

In [10]:
knn_classifier = sklearn.neighbors.KNeighborsClassifier(n_neighbors=3) #CAMBIAR POR UN SVM

In [11]:
knn_classifier.fit(training_data,trainig_labels)

KNeighborsClassifier(n_neighbors=3)

In [12]:
clf = svm.SVC(C=1.0,probability=True)
clf.fit(training_data,trainig_labels)

SVC(probability=True)

#### Guardar el modelo entrenado ##

In [13]:
pickle.dump(knn_classifier,open('./face_detector', 'wb'))
pickle.dump(clf,open('./face_detector', 'wb'))

#### Cargar el Modelo entrenado 

In [14]:
classifier = pickle.load(open('./face_detector','rb'))

In [15]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
training_data, trainig_labels = load_training_data(training_faces_dir,negative_examples_training_dir, FeatureExtractors.LBP)
clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(training_data, trainig_labels)







##Loading 100 positive face images
##Loading 200 negative face images


Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(gamma='auto'))])

### Do the sliding window and Visualize Results ###
1. In this part you need to perform the sliding window and score the probabilty of each patch of being a face and select the highest probability patches. 
    1a. Program you own sliding window in 'img_utils' file.
    1b. Extract Features and classify each patch with your own classifier. 
2. Do non-max suppression to select the target face patch with best probability (This is provided in 'img_utils')
3. Visualize Detection with Ground Throuth Bounding Box ('Code for Visualization is Provided')

In [16]:
def sliding_window(image, stepSize, windowSize):
    [image_rows, image_cols] = image.shape
    window_rows = windowSize[0];
    window_cols = windowSize[1];
    patches = np.zeros((windowSize[0], windowSize[1],(image_cols-window_cols)*(image_rows-window_rows)))                                              
    bbox_locations = np.zeros(((image_cols-window_cols)*(image_rows-window_rows),4))                                                                  
    i = 0
    # slide a window across the image
    range_y = int((image_cols-window_cols)/stepSize)
    range_x = int((image_rows-window_rows)/stepSize)
    
    for y in range(0, range_y, stepSize):
      for x in range(0, range_x, stepSize):
        patches[:,:,i] = image[x:x+window_rows, y:y+window_cols]
        bbox_locations[i,:] = [x, y, window_rows, window_cols]
        i+=1
    return patches, bbox_locations

In [17]:
window_size = [64, 64]
predictions = []
threshold_p = 0.8    #
overlap_threshold = 0.5     #
validation_data = load_validation_data(validation_faces_dir)
for img in validation_data:
    gray_image = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    #patches, bbox_locations = sliding_window(gray_image,window_size,1,32)     #
    patches, bbox_locations = sliding_window(gray_image,stepSize=32, windowSize=(64, 64))     #Nuestra
    
    ## You need to extract features for every patch (same features you used for training the classifier)
    patches_feature_representation = []
    for i in range(patches.shape[2]):
        patch_representation = extract_features(FeatureExtractors.HOG, patches[:,:,i])
        patches_feature_representation.append(patch_representation)
    patches_feature_representation = np.asarray(patches_feature_representation)
    ## Get prediction label for each sliding window patch
    labels = classifier.predict(patches_feature_representation)
    ## Get score for each sliding window patch
    scores = classifier.predict_proba(patches_feature_representation)
    ## Positive Face Probabilities
    face_probabilities = scores[:,1]
    face_bboxes = bbox_locations[face_probabilities>threshold_p]
    face_bboxes_probabilites = face_probabilities[face_probabilities>threshold_p]
    # Do non max suppression and select strongest probability box
    [selected_bbox, selected_score] = non_max_suppression(face_bboxes,face_bboxes_probabilites)
    show_image_with_bbox(img, selected_bbox)


ValueError: X.shape[1] = 1024 should be equal to 4096, the number of features at training time

### Evaluate Detector ###

In [None]:
def evaluate_detector(bboxes, positive_probabilites):
    
    true_positives_number = np.zeros((100))
    actual_positives = np.zeros((100))
    predicted_positives = np.zeros((100))
    overlap_threshold = 0.5

    for i in np.arange(0,1,0.01):
        probability_threshold = i
        positive_bboxes = bboxes[positive_probabilites>=probability_threshold]
        positive_bboxes_prob = positive_probabilites[positive_probabilites>=probability_threshold]
        idx = int(np.round(i*100))
        if len(positive_bboxes) > 0:
            [selected_bboxes, selected_scores] = non_max_suppression(positive_bboxes, positive_bboxes_prob,0.3)
            ratio = []
            for selected_bbox in selected_bboxes:
                ratio.append(bb_intersection_over_union(selected_bbox, [82,91,84,84]));                                   
            
            ratio = np.asarray(ratio)
            positive_number = sum(ratio>=0.5); 
            
            true_positives_number[idx] = positive_number>=1;
            actual_positives[idx] = 1
            predicted_positives[idx] = len(ratio)

        else:
            true_positives_number[idx] = 0;
            actual_positives[idx] = 1
            predicted_positives[idx] = 0

    return [true_positives_number, actual_positives, predicted_positives]

In [None]:
total_true_positives = []
total_real_positives = []
total_true_prediction = []
window_size = [64, 64]
score_threshold = 0.5
sum_boxes=0
idx=0
for subject_folder in sorted(glob(validation_raw_faces_dir + '/*')):
    for img in sorted(glob(subject_folder + '/*.jpg')):
        idx+=1
        gray_image = cv2.imread(img,cv2.IMREAD_GRAYSCALE)
        #patches, bbox_locations = sliding_window(gray_image,window_size,1,32)
        
        patches, bbox_locations = sliding_window(gray_image,stepSize=32, windowSize=(64, 64))     #Nuestra
    
        
        ## You need to extract features for every patch (same features you used for training the classifier)
        patches_feature_representation = []
        for i in range(patches.shape[2]):
            patch_representation = extract_features(FeatureExtractors.HOG, patches[:,:,i])
            patches_feature_representation.append(patch_representation)
        patches_feature_representation = np.asarray(patches_feature_representation)
        ## Get score for each sliding window patch
        scores = classifier.predict_proba(patches_feature_representation)
        ## Positive Face Probabilities
        face_probabilities = scores[:,1]
        ## Filter boxes by probability or score KNN probability > 0.5 or SVM score > 0
        [ detected_true_positives, image_real_positives, detected_faces ] = evaluate_detector( bbox_locations, face_probabilities );
        
        total_true_positives.append(detected_true_positives)
        total_real_positives.append(image_real_positives)
        total_true_prediction.append(detected_faces)

total_true_positives = np.asarray(total_true_positives)
total_real_positives = np.asarray(total_real_positives)
total_true_prediction = np.asarray(total_true_prediction)


In [None]:
precision, recall = precision_and_recall(total_true_positives, total_real_positives,total_true_prediction)

In [None]:
plt.plot(recall, precision)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.xlim(0,1.1)
plt.ylim(0,1.1)

In [None]:
ap = interpolated_average_precision(recall,precision)

In [None]:
print('Detection Average Precision is {}'.format(ap))