# Demo webcam/image

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import signal
from IPython import display

from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from skimage.transform import resize
from keras.models import load_model

import joblib
import mtcnn
from imageio import imread

%matplotlib inline

## Configuration

This demo uses the labels and model trained in `svm-classification.ipynb`.

In [None]:
plt.rcParams["figure.figsize"] = (20,10)

model_path = '../model/keras/model/facenet_keras.h5'
model = load_model(model_path)

clf = joblib.load('svm-model.sav')
labels = np.load('svm-labels.npy')
labels

## Functions

In [None]:
def prewhiten(x):
    if x.ndim == 4:
        axis = (1, 2, 3)
        size = x[0].size
    elif x.ndim == 3:
        axis = (0, 1, 2)
        size = x.size
    else:
        raise ValueError('Dimension should be 3 or 4')

    mean = np.mean(x, axis=axis, keepdims=True)
    std = np.std(x, axis=axis, keepdims=True)
    std_adj = np.maximum(std, 1.0/np.sqrt(size))
    y = (x - mean) / std_adj
    return y

def l2_normalize(x, axis=-1, epsilon=1e-10):
    output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
    return output

In [None]:
def calc_embs(imgs, margin, batch_size):
    aligned_images = prewhiten(imgs)
    pd = []
    for start in range(0, len(aligned_images), batch_size):
        pd.append(model.predict_on_batch(aligned_images[start:start+batch_size]))
    embs = l2_normalize(np.concatenate(pd))

    return embs

In [None]:
class FaceDemo(object):
    def __init__(self, clf, labels):
        self.vc = None
        self.margin = 32
        self.batch_size = 1
        self.n_img_per_person = 10
        self.is_interrupted = False
        self.data = {}
        self.le = LabelEncoder().fit(labels)
        self.clf = clf
        self.detector = mtcnn.MTCNN()
        
    def _signal_handler(self, signal, frame):
        self.is_interrupted = True         

    def capture(self):
        vc = cv2.VideoCapture(0)
        self.vc = vc
        if vc.isOpened():
            is_capturing, _ = vc.read()
        else:
            is_capturing = False

        signal.signal(signal.SIGINT, self._signal_handler)
        self.is_interrupted = False
        
        while is_capturing:
            is_capturing, frame = vc.read()

            frame = self.infer(frame)
            
            display.clear_output(wait=True)
            plt.imshow(frame)
            plt.xticks([])
            plt.yticks([])
            
            try:
                plt.pause(0.1)
            except Exception:
                pass
            
            if self.is_interrupted:
                vc.release()
                break

    def align_face(self, img, shape):
        (h, w) = img.shape[:2]
        
        # compute the angle between the eye centroids
        dY = shape['right_eye'][1] - shape['left_eye'][1]
        dX = shape['right_eye'][0] - shape['left_eye'][0]
        angle = np.degrees(np.arctan2(dY, dX))

        # compute center (x, y)-coordinates (i.e., the median point)
        # between the two eyes in the input image
        eyesCenter = ((shape['left_eye'][0] + shape['right_eye'][0]) // 2,
                      (shape['left_eye'][1] + shape['right_eye'][1]) // 2)

        # grab the rotation matrix for rotating and scaling the face
        M = cv2.getRotationMatrix2D(eyesCenter, angle, scale=1.0)
        aligned = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC)
        
        return aligned
            
    def infer(self, frame):
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces = self.detector.detect_faces(frame)
            
        #preds = []
        if len(faces) > 0:
            for face in faces:
                if face['confidence'] > 0.8:
                    #print(face)
                    (x, y, w, h) = face['box']
                    img_size = np.asarray(frame.shape)[0:2]
                    left = max(x-self.margin//2, 0)
                    right = max(y-self.margin//2, 0)
                    bottom = min(x+w+self.margin//2, img_size[1])
                    top = min(y+h+self.margin//2, img_size[0])
                    #img = self.align_face(frame, face['keypoints'])
                    img = resize(frame[right:top, left:bottom, :], (160, 160), mode='reflect')

                    embs = calc_embs(img[np.newaxis], self.margin, 1)                    
                    predict_proba = clf.predict_proba(embs)
                    pred = self.le.inverse_transform([p.argmax() for p in predict_proba])
                    proba = [p[p.argmax()] for p in predict_proba]

                    # Mean + Std from the error samples in the test dataset
                    if proba[0] > 0.30:
                        cv2.putText(frame, '{} ({:.2f})'.format(pred[0], proba[0]), (x-self.margin, y-self.margin-10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 1.25,(255,0,0), 2, cv2.LINE_AA)

                    cv2.rectangle(frame, (left-1, right-1), (bottom+1, top+1), (255, 0, 0), thickness=3)
            
        return frame

## Webcam

In [None]:
f = FaceDemo(clf, labels)

In [None]:
f.capture()

## Image

In [None]:
picture = '~/Desktop/Screen Shot 2020-04-27 at 23.39.04.png'

img = imread(picture)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

h, w, d = img.shape

plt.figure(figsize=(w/90,h/90))
plt.imshow(f.infer(img), aspect='auto')