# Face detection + Emotion + Age + Gender

## 1. Get the libraries

In [1]:
import os
import sys
import time

import cv2
import numpy as np
from IPython import display
from openvino.runtime import Core

sys.path.append("utils")
import notebook_utils as utils

## 2. Get the model

In [2]:
def get_model(base_model_dir,model_name,precision = "FP16",sub_dir='intel'):
    download_command = f"omz_downloader " \
                   f"--name {model_name} " \
                   f"--output_dir {base_model_dir} " \
                   f"--cache_dir {base_model_dir}"
    ! $download_command
    # The output path for the conversion.
    converted_model_path = f"{base_model_dir}/{sub_dir}/{model_name}/{precision}/{model_name}.xml"

    if not os.path.exists(converted_model_path):
        convert_command = f"omz_converter " \
                          f"--name {model_name} " \
                          f"--download_dir {base_model_dir} " \
                          f"--precisions {precision}"
        ! $convert_command
    # Initialize OpenVINO Runtime.
    ie_core = Core()
    # Read the network and corresponding weights from a file.
    model = ie_core.read_model(model=converted_model_path)
    return ie_core.compile_model(model=model, device_name="CPU")

#compiled_model=get_model('face_detect','face-detection-adas-0001')

In [3]:
class face_detection():
    def __init__(self):
        self.compiled_model = get_model('face_detect','face-detection-adas-0001')
        
        self.output_layer = self.compiled_model.output(0)
        input_layer = self.compiled_model.input(0)
        self.height, self.width = list(input_layer.shape)[2:4]
face_det=face_detection()

################|| Downloading face-detection-adas-0001 ||################









In [4]:
class emotion_detction():
    def __init__(self):
        self.compiled_model = get_model('emotions_detect','emotions-recognition-retail-0003')
        
        self.output_layer = self.compiled_model.output(0)
        input_layer = self.compiled_model.input(0)
        self.height, self.width = list(input_layer.shape)[2:4]
emotion_det=emotion_detction()

################|| Downloading emotions-recognition-retail-0003 ||################









In [5]:
class age_gender_detection():
    def __init__(self):
        self.compiled_model = get_model('age_gender_model','age-gender-recognition-retail-0013')
        
        self.age = self.compiled_model.output(1)
        self.gender = self.compiled_model.output(0)
        input_layer = self.compiled_model.input(0)
        self.height, self.width = list(input_layer.shape)[2:4]
ag_det=age_gender_detection()

################|| Downloading age-gender-recognition-retail-0013 ||################









In [6]:
def get_points(box,bounds,stretch=0):
    #the NN can sometimes return negative numbers that makes no since 
    box=[max(x,0) for x in box]
    
    #getting points of the ends of the box (stretching a bit)
    x1 = box[0] - stretch*box[2]
    x2 = box[0] + (1+stretch)*box[2]
    y1 = box[1] -stretch*box[3]
    y2 = box[1] + (1+stretch)*box[3]
    
    x1,x2=(int(min(max(s,0),bounds[1])) for s in (x1,x2))
    y1,y2=(int(min(max(s,0),bounds[0])) for s in (y1,y2))
    assert x1<=x2 and y1<=y2 
    return x1,x2,y1,y2

In [7]:
def process_faces(frame, boxes):
    
    for box in boxes:
        ...
         
        #age-gender input
        input_img=x[y1:y2,x1:x2]
        input_img = cv2.resize(
                src=input_img, dsize=(ag_det.width, ag_det.height),interpolation=cv2.INTER_AREA)
        
        
        
        input_img = input_img[np.newaxis]
        input_img=np.transpose(input_img,[0,3,1,2])
        
        #age-gender output
        output= ag_det.compiled_model([input_img])
        age,gender=output[ag_det.age],output[ag_det.gender]
        
        age=np.squeeze(age)
        age*=100
        
        gender=np.squeeze(gender)
        if (gender[0]>=0.65):
            gender='female '
        elif (gender[1]>=0.55):
            gender='male '
        else:
            gender='nb '
        
        #drawing results
        cv2.putText(
            img=frame,
            text=f"{gender}{age:.0f}{' '}{EMOTION_NAMES[index]}", #{emotion_score:.0f}
            org=(box[0] + 10, box[1] + 30),
            fontFace=cv2.FONT_HERSHEY_COMPLEX,
            fontScale=frame.shape[1] / 1000,
            color=color,
            thickness=1,
            lineType=cv2.LINE_AA,
        )

    return frame

In [8]:
def get_boxes(frame, results, thresh=0.1):
    # The size of the original frame.
    h, w = frame.shape[:2]
    results = results.squeeze()
    boxes = []
    for _, label, score, xmin, ymin, xmax, ymax in results:
        if label==1. and score>thresh:
            # Create a box with pixels coordinates from the box with normalized coordinates [0,1].
            boxes.append(
                tuple(map(int, (xmin * w, ymin * h, (xmax - xmin) * w, (ymax - ymin) * h)))
            )
    return boxes


EMOTION_NAMES=['neutral','happy','sad','surprise','anger']

def process_faces(frame, boxes):
    x=frame.copy()
    for box in boxes:
        #showing the base box
        color = (0,200,0)
        x1,x2,y1,y2=get_points(box,x.shape[:2])
        cv2.rectangle(img=frame, pt1=(x1,y1), pt2=(x2, y2), color=color, thickness=2)
        
        #emotion  input
        input_img=x[y1:y2,x1:x2]
        input_img = cv2.resize(
                src=input_img, dsize=(emotion_det.width, emotion_det.height),interpolation=cv2.INTER_AREA)
        input_img = input_img[np.newaxis]
        input_img=np.transpose(input_img,[0,3,1,2])
        
        #emotion output
        emotion= emotion_det.compiled_model([input_img])[emotion_det.output_layer]
        emotion=np.squeeze(emotion)
        index=np.argmax(emotion)
        emotion_score=emotion[index]
             
        #age-gender input
        #input_img=x[y1:y2,x1:x2]
        input_img=x[y1-10:y2+10,x1-10:x2+10]
        
        input_img = cv2.resize(
                src=input_img, dsize=(ag_det.width, ag_det.height),interpolation=cv2.INTER_AREA )
        input_img = input_img[np.newaxis]
        input_img=np.transpose(input_img,[0,3,1,2])
        
        #age-gender output
        output= ag_det.compiled_model([input_img])
        age,gender=output[ag_det.age],output[ag_det.gender]
        
        age=np.squeeze(age)
        age*=100
        
        gender=np.squeeze(gender)
        if (gender[0]>=0.65):
            gender='female '
        elif (gender[1]>=0.55):
            gender='male '
        else:
            gender='nb '
        
        #drawing results
        cv2.putText(
            img=frame,
            text=f"{gender}{age:.0f}{' '}{EMOTION_NAMES[index]}", #{emotion_score:.0f}
            org=(box[0] + 10, box[1] + 30),
            fontFace=cv2.FONT_HERSHEY_COMPLEX,
            fontScale=frame.shape[1] / 1000,
            color=color,
            thickness=1,
            lineType=cv2.LINE_AA,
        )

    return frame

In [9]:
# Main processing function to run object detection.
def main_loop(source=0, flip=False, use_popup=False, skip_first_frames=0):
    player = None
    try:
        # Create a video player to play with target fps.
        player = utils.VideoPlayer(
            source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames
        )
        # Start capturing.
        player.start()
        if use_popup:
            title = "Press ESC to Exit"
            cv2.namedWindow(
                winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE
            )
        while True:
            # Grab the frame.
            frame = player.next()
            if frame is None:
                print("Source ended")
                break
            # If the frame is larger than full HD, reduce size to improve the performance.
            scale = 1280 / max(frame.shape)
            if scale < 1:
                frame = cv2.resize(
                    src=frame,
                    dsize=None,
                    fx=scale,
                    fy=scale,
                    interpolation=cv2.INTER_AREA,
                )
            # Resize the image and change dims to fit neural network input.
            input_img = cv2.resize(
                src=frame, dsize=(face_det.width, face_det.height)
            )
            # Create a batch of images (size = 1).
            input_img = input_img[np.newaxis, ...]
            input_img=np.transpose(input_img,[0,3,1,2])
            
            # Get the results.
            results = face_det.compiled_model([input_img])[face_det.output_layer]
            
            # Get boxes from network results.
            boxes = get_boxes(frame=frame, results=results)
            
            
            # Draw boxes on a frame.
            frame = process_faces(frame=frame, boxes=boxes)
            

            # Use this workaround if there is flickering.
            if use_popup:
                cv2.imshow(winname=title, mat=frame)
                key = cv2.waitKey(1)
                # escape = 27
                if key == 27:
                    break
            else:
                # Encode numpy array to jpg.
                _, encoded_img = cv2.imencode(
                    ext=".jpg", img=frame, params=[cv2.IMWRITE_JPEG_QUALITY, 100]
                )
                # Create an IPython image.
                i = display.Image(data=encoded_img)
                # Display the image in this notebook.
                display.clear_output(wait=True)
                display.display(i)
    # ctrl-c
    except KeyboardInterrupt:
        print("Interrupted")
    # any different error
    except RuntimeError as e:
        print(e)
    finally:
        if player is not None:
            # Stop capturing.
            player.stop()
        if use_popup:
            cv2.destroyAllWindows()

### run on a video

In [10]:
video_file='videos/walking-faces.mp4'
main_loop(source=video_file, flip=False, use_popup=True)


Interrupted


### run on a Web-Cam

In [11]:
main_loop(source=0, flip=False, use_popup=True)

Interrupted
