# import

In [1]:
import sys
# '/opt/intel/openvino_2019.2.242/python/python3.7' doesn't work 
sys.path[1] = '/opt/intel/openvino_2019.2.242/python/python3.6'

from openvino.inference_engine import IENetwork, IEPlugin, IECore

import requests
from PIL import Image
from io import BytesIO
import cv2
import numpy as np
import os
import math
import matplotlib.pyplot as plt

In [27]:
cv2.__version__

'4.1.1-openvino'

In [2]:
import logging as log

In [3]:
import time


# init setting

In [57]:
device = "CPU"
plugin = IEPlugin(device=device, plugin_dirs=None)
if device == "CPU":
    plugin.add_cpu_extension("./deployment_tools/inference_engine/lib/intel64/libcpu_extension.dylib")

if "CPU" in device:
    supported_layers = ie.query_network(net, "CPU")
    not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers]
    if len(not_supported_layers) != 0:
        log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                  format(args.device, ', '.join(not_supported_layers)))
        log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                  "or --cpu_extension command line argument")
        sys.exit(1)

# model

In [5]:
task_names = ['detect_face', 'emotion_recognition', 'estimate_headpose', 'detect_person']

In [6]:
path_model_dir = './model/'
tasks = {'detect_face':'face-detection-adas-0001.xml',
                'emotion_recognition': 'emotions-recognition-retail-0003.xml',
                'estimate_headpose': 'head-pose-estimation-adas-0001.xml', 
                'detect_person': 'person-detection-retail-0002.xml'}

In [7]:
path_extension = "./deployment_tools/inference_engine/lib/intel64/libcpu_extension.dylib"

In [42]:
class Args(object):
    def __init__(self, device='CPU', input_='cam', ):
        self.device = device
        self.input = input_
        self.labels=None
        self.prob_threshold = 0.5

In [43]:
args = Args(device)

In [44]:
class Model(object):
    # TODO: load from config
    def __init__(self, task, device="CPU"):
        self.task = task
        self._set_model_path()
        # Read IR
        self.net = IENetwork(model=self.model_xml, weights=self.model_bin)
        # Load Model
        self._set_ieplugin()
        
    def _set_ieplugin(self):
        plugin = IEPlugin(device=device, plugin_dirs=None)
        if device == "CPU":
            plugin.add_cpu_extension(path_extension)
        
        self.exec_net = plugin.load(network=self.net, num_requests=2)
        self._get_io_blob()

    def _set_model_path(self):
        model_name = tasks[self.task]

        self.model_xml = os.path.join(path_model_dir, model_name)
        self.model_bin = os.path.splitext(self.model_xml)[0] + ".bin"
    
    def _get_io_blob(self):
        self.input_blob = next(iter(self.net.inputs))
        self.out_blob = next(iter(self.net.outputs))
    
    def _in_frame(self, frame, n, c, h, w):
        in_frame = cv2.resize(frame, (w, h))
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n, c, h, w))
        return in_frame


In [54]:
class ModelDetectPerson(Model):
    def __init__(self, task):
        super().__init__(task)
        self._set_iecore()
    
    def _set_iecore(self):
        self.ie = IECore()
        self.ie.add_extension(path_extension, "CPU")
        self.exec_net = self.ie.load_network(network=self.net, num_requests=2, device_name=args.device)
        
        if "CPU" in device:
            supported_layers = self.ie.query_network(self.net, "CPU")
            not_supported_layers = [l for l in self.net.layers.keys() if l not in supported_layers]
            if len(not_supported_layers) != 0:
                log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                          format(args.device, ', '.join(not_supported_layers)))
                log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                          "or --cpu_extension command line argument")
                sys.exit(1)
        
    def compute(self):
        img_info_input_blob = None
        feed_dict = {}
        for blob_name in self.net.inputs:
            if len(self.net.inputs[blob_name].shape) == 4:
                input_blob = blob_name
            elif len(self.net.inputs[blob_name].shape) == 2:
                img_info_input_blob = blob_name
            else:
                raise RuntimeError("Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported"
                                   .format(len(net.inputs[blob_name].shape), blob_name))

        assert len(self.net.outputs) == 1, "Demo supports only single output topologies"
        
        out_blob = next(iter(self.net.outputs))
        log.info("Loading IR to the plugin...")
        # exec_net = self.ie.load_network(network=self.net, num_requests=2, device_name=args.device)
        # Read and pre-process input image
        n, c, h, w = self.net.inputs[input_blob].shape
        if img_info_input_blob:
            feed_dict[img_info_input_blob] = [h, w, 1]

        if args.input == 'cam':
            input_stream = 0
        else:
            input_stream = args.input
            assert os.path.isfile(args.input), "Specified input file doesn't exist"
        if args.labels:
            with open(args.labels, 'r') as f:
                labels_map = [x.strip() for x in f]
        else:
            labels_map = None

        cap = cv2.VideoCapture(input_stream)

        cur_request_id = 0
        next_request_id = 1

        log.info("Starting inference in async mode...")
        is_async_mode = True
        render_time = 0
        ret, frame = cap.read()

        print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key")
        print("To switch between sync/async modes, press TAB key in the output window")

        while cap.isOpened():
            if is_async_mode:
                ret, next_frame = cap.read()
            else:
                ret, frame = cap.read()
            if not ret:
                break
            initial_w = cap.get(3)
            initial_h = cap.get(4)
            # Main sync point:
            # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete
            # in the regular mode we start the CURRENT request and immediately wait for it's completion
            inf_start = time.time()
            if is_async_mode:
                in_frame = cv2.resize(next_frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
                in_frame = in_frame.reshape((n, c, h, w))
                feed_dict[self.input_blob] = in_frame
                # import pdb; pdb.set_trace()
                self.exec_net.start_async(request_id=next_request_id, inputs=feed_dict)
    
            else:
                in_frame = cv2.resize(frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
                in_frame = in_frame.reshape((n, c, h, w))
                feed_dict[input_blob] = in_frame
                self.exec_net.start_async(request_id=cur_request_id, inputs=feed_dict)
            if self.exec_net.requests[cur_request_id].wait(-1) == 0:
                inf_end = time.time()
                det_time = inf_end - inf_start

                # Parse detection results of the current request
                res = self.exec_net.requests[cur_request_id].outputs[out_blob]
                for obj in res[0][0]:
                    # Draw only objects when probability more than specified threshold
                    if obj[2] > args.prob_threshold:
                        xmin = int(obj[3] * initial_w)
                        ymin = int(obj[4] * initial_h)
                        xmax = int(obj[5] * initial_w)
                        ymax = int(obj[6] * initial_h)
                        class_id = int(obj[1])
                        # Draw box and label\class_id
                        color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255))
                        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                        det_label = labels_map[class_id] if labels_map else str(class_id)
                        cv2.putText(frame, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7),
                                    cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1)

                # Draw performance stats
                inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
                    "Inference time: {:.3f} ms".format(det_time * 1000)
                render_time_message = "OpenCV rendering time: {:.3f} ms".format(render_time * 1000)
                async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if is_async_mode else \
                    "Async mode is off. Processing request {}".format(cur_request_id)

                cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
                cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
                cv2.putText(frame, async_mode_message, (10, int(initial_h - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                            (10, 10, 200), 1)

            #
            render_start = time.time()
            cv2.imshow("Detection Results", frame)
            render_end = time.time()
            render_time = render_end - render_start

            if is_async_mode:
                cur_request_id, next_request_id = next_request_id, cur_request_id
                frame = next_frame

            key = cv2.waitKey(1)
            if key == 27:
                break
            if (9 == key):
                is_async_mode = not is_async_mode
                log.info("Switched to {} mode".format("async" if is_async_mode else "sync"))

        cv2.destroyAllWindows()

In [55]:
model_dp = ModelDetectPerson('detect_person')

In [56]:
model_dp.compute()

To close the application, press 'CTRL+C' here or switch to the output window and press ESC key
To switch between sync/async modes, press TAB key in the output window


ValueError: could not broadcast input array from shape (3) into shape (1,6)

In [48]:
class ModelDetectPerson(Model):
    def __init__(self, task):
        super().__init__(task)
        self._set_iecore()
    
    def _set_iecore(self):
        self.ie = IECore()
        self.ie.add_extension(path_extension, "CPU")
        self.exec_net = self.ie.load_network(network=self.net, num_requests=2, device_name=args.device)
        
        if "CPU" in device:
            supported_layers = self.ie.query_network(self.net, "CPU")
            not_supported_layers = [l for l in self.net.layers.keys() if l not in supported_layers]
            if len(not_supported_layers) != 0:
                log.error("Following layers are not supported by the plugin for specified device {}:\n {}".
                          format(args.device, ', '.join(not_supported_layers)))
                log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l "
                          "or --cpu_extension command line argument")
                sys.exit(1)
        
    def main(self):
        img_info_input_blob = None
        feed_dict = {}
        for blob_name in self.net.inputs:
            if len(self.net.inputs[blob_name].shape) == 4:
                input_blob = blob_name
            elif len(self.net.inputs[blob_name].shape) == 2:
                img_info_input_blob = blob_name
            else:
                raise RuntimeError("Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported"
                                   .format(len(net.inputs[blob_name].shape), blob_name))

        assert len(self.net.outputs) == 1, "Demo supports only single output topologies"
        
        out_blob = next(iter(self.net.outputs))
        log.info("Loading IR to the plugin...")
        # exec_net = self.ie.load_network(network=self.net, num_requests=2, device_name=args.device)
        # Read and pre-process input image
        n, c, h, w = self.net.inputs[input_blob].shape
        if img_info_input_blob:
            feed_dict[img_info_input_blob] = [h, w, 1]

        if args.input == 'cam':
            input_stream = 0
        else:
            input_stream = args.input
            assert os.path.isfile(args.input), "Specified input file doesn't exist"
        if args.labels:
            with open(args.labels, 'r') as f:
                labels_map = [x.strip() for x in f]
        else:
            labels_map = None

        cap = cv2.VideoCapture(input_stream)

        cur_request_id = 0
        next_request_id = 1

        log.info("Starting inference in async mode...")
        is_async_mode = True
        render_time = 0
        ret, frame = cap.read()

        print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key")
        print("To switch between sync/async modes, press TAB key in the output window")

        while cap.isOpened():
            if is_async_mode:
                ret, next_frame = cap.read()
            else:
                ret, frame = cap.read()
            if not ret:
                break
            initial_w = cap.get(3)
            initial_h = cap.get(4)
            # Main sync point:
            # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete
            # in the regular mode we start the CURRENT request and immediately wait for it's completion
            inf_start = time.time()
            if is_async_mode:
                in_frame = cv2.resize(next_frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
                in_frame = in_frame.reshape((n, c, h, w))
                feed_dict[self.input_blob] = in_frame
                # import pdb; pdb.set_trace()
                self.exec_net.start_async(request_id=next_request_id, inputs=feed_dict)
    
            else:
                in_frame = cv2.resize(frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
                in_frame = in_frame.reshape((n, c, h, w))
                feed_dict[input_blob] = in_frame
                self.exec_net.start_async(request_id=cur_request_id, inputs=feed_dict)
            if self.exec_net.requests[cur_request_id].wait(-1) == 0:
                inf_end = time.time()
                det_time = inf_end - inf_start

                # Parse detection results of the current request
                res = self.exec_net.requests[cur_request_id].outputs[out_blob]
                for obj in res[0][0]:
                    # Draw only objects when probability more than specified threshold
                    if obj[2] > args.prob_threshold:
                        xmin = int(obj[3] * initial_w)
                        ymin = int(obj[4] * initial_h)
                        xmax = int(obj[5] * initial_w)
                        ymax = int(obj[6] * initial_h)
                        class_id = int(obj[1])
                        # Draw box and label\class_id
                        color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255))
                        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                        det_label = labels_map[class_id] if labels_map else str(class_id)
                        cv2.putText(frame, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7),
                                    cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1)

                # Draw performance stats
                inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
                    "Inference time: {:.3f} ms".format(det_time * 1000)
                render_time_message = "OpenCV rendering time: {:.3f} ms".format(render_time * 1000)
                async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if is_async_mode else \
                    "Async mode is off. Processing request {}".format(cur_request_id)

                cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
                cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
                cv2.putText(frame, async_mode_message, (10, int(initial_h - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                            (10, 10, 200), 1)

            #
            render_start = time.time()
            cv2.imshow("Detection Results", frame)
            render_end = time.time()
            render_time = render_end - render_start

            if is_async_mode:
                cur_request_id, next_request_id = next_request_id, cur_request_id
                frame = next_frame

            key = cv2.waitKey(1)
            if key == 27:
                break
            if (9 == key):
                is_async_mode = not is_async_mode
                log.info("Switched to {} mode".format("async" if is_async_mode else "sync"))

        cv2.destroyAllWindows()

In [21]:
debug

> [0;32m/Users/yuya/src/pedestrian_traker/ie_api.pyx[0m(368)[0;36mopenvino.inference_engine.ie_api.InferRequest._fill_inputs[0;34m()[0m

ipdb> q


In [26]:
debug

> [0;32m/Users/yuya/src/pedestrian_traker/ie_api.pyx[0m(368)[0;36mopenvino.inference_engine.ie_api.InferRequest._fill_inputs[0;34m()[0m

ipdb> q


In [29]:
class ModelDetectFace(Model):
    # TODO: load from config    
    def get_face_pos(self, frame):
        n, c, h, w = self.net.inputs[self.input_blob].shape
        self.shapes = (n, c, h, w)
        scale = 640 / frame.shape[1]
        frame = cv2.resize(frame, dsize=None, fx=scale, fy=scale)
        
        self.frame_h, self.frame_w = frame.shape[:2]
        init_frame = frame.copy()

        in_frame = self._in_frame(frame, n, c, h, w)
        self.exec_net.start_async(request_id=0, inputs={self.input_blob: in_frame}) # res's shape: [1, 1, 200, 7]

        if self.exec_net.requests[0].wait(-1) == 0:
            res = self.exec_net.requests[0].outputs[self.out_blob]
            faces = res[0][:, np.where(res[0][0][:, 2] > 0.5)] # prob threshold : 0.5
        return faces
    
    def detect_face(self, frame):
        faces = self.get_face_pos(frame)
        
        # frame = init_frame.copy()
        for face in faces[0][0]:
            box = face[3:7] * np.array([self.frame_w, self.frame_h, self.frame_w, self.frame_h])
            (xmin, ymin, xmax, ymax) = box.astype("int")
            """
            xmin = int(face[3] * frame_w)
            ymin = int(face[4] * frame_h)
            xmax = int(face[5] * frame_w)
            ymax = int(face[6] * frame_h)
            """
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
        
        return frame

In [30]:
label = ('neutral', 'happy', 'sad', 'surprise', 'anger')
# plot setting
rows = 6
columns = 6
plt.rcParams['figure.figsize'] = (18.0, 18.0)
figsize = (8, 8)

class ModelEmotionRecognition(Model):
 
    def emotion_recognition(self, frame, faces, rect):# 4. Create Async Request
        frame_h, frame_w = frame.shape[:2]
        n, c, h, w = self.net.inputs[self.input_blob].shape
        face_id = 0
        for face in faces[0][0]:
            box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h])
            (xmin, ymin, xmax, ymax) = box.astype("int")
            face_frame = frame[ymin:ymax, xmin:xmax]
            
            if (face_frame.shape[0]==0) or (face_frame.shape[1]==0):
                continue
            
            in_frame = self._in_frame(frame, n, c, h, w)
            self.exec_net.start_async(request_id=0 ,inputs={self.input_blob: in_frame})

            # 5. Get reponse
            if self.exec_net.requests[0].wait(-1) == 0:
                res = self.exec_net.requests[0].outputs[self.out_blob]
                emotion = label[np.argmax(res[0])]
                ax = plt.subplot(rows, columns, face_id + 1)
                ax.set_title("{}".format(emotion))
                plt.imshow(face_frame)
                face_id +=1
            
            if rect:
                frame = cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            cv2.putText(frame, emotion, (20,50), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0,0,200), 2, cv2.LINE_AA)
        
        return frame


In [31]:
class ModelEstimateHeadpose(Model):
    def _build_camera_matrix(self, center_of_face, focal_length):
    
        cx = int(center_of_face[0])
        cy = int(center_of_face[1])
        camera_matrix = np.zeros((3, 3), dtype='float32')
        camera_matrix[0][0] = focal_length
        camera_matrix[0][2] = cx
        camera_matrix[1][1] = focal_length
        camera_matrix[1][2] = cy
        camera_matrix[2][2] = 1

        return camera_matrix

    def _draw_axes(self, frame, center_of_face, yaw, pitch, roll, scale, focal_length):
        yaw *= np.pi / 180.0
        pitch *= np.pi / 180.0
        roll *= np.pi / 180.0

        cx = int(center_of_face[0])
        cy = int(center_of_face[1])
        Rx = np.array([[1,                0,                               0],
                       [0,                math.cos(pitch),  -math.sin(pitch)],
                       [0,                math.sin(pitch),   math.cos(pitch)]])
        Ry = np.array([[math.cos(yaw),    0,                  -math.sin(yaw)],
                       [0,                1,                               0],
                       [math.sin(yaw),    0,                   math.cos(yaw)]])
        Rz = np.array([[math.cos(roll),   -math.sin(roll),                 0],
                       [math.sin(roll),   math.cos(roll),                  0],
                       [0,                0,                               1]]) 

        #R = np.dot(Rz, Ry, Rx)
        #ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/
        #R = np.dot(Rz, np.dot(Ry, Rx))
        R = Rz @ Ry @ Rx
        # print(R)
        camera_matrix = self._build_camera_matrix(center_of_face, focal_length)

        xaxis = np.array(([1 * scale, 0, 0]), dtype='float32').reshape(3, 1)
        yaxis = np.array(([0, -1 * scale, 0]), dtype='float32').reshape(3, 1)
        zaxis = np.array(([0, 0, -1 * scale]), dtype='float32').reshape(3, 1)
        zaxis1 = np.array(([0, 0, 1 * scale]), dtype='float32').reshape(3, 1)

        o = np.array(([0, 0, 0]), dtype='float32').reshape(3, 1)
        o[2] = camera_matrix[0][0]

        xaxis = np.dot(R, xaxis) + o
        yaxis = np.dot(R, yaxis) + o
        zaxis = np.dot(R, zaxis) + o
        zaxis1 = np.dot(R, zaxis1) + o

        xp2 = (xaxis[0] / xaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (xaxis[1] / xaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, (cx, cy), p2, (0, 0, 255), 2)

        xp2 = (yaxis[0] / yaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (yaxis[1] / yaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, (cx, cy), p2, (0, 255, 0), 2)

        xp1 = (zaxis1[0] / zaxis1[2] * camera_matrix[0][0]) + cx
        yp1 = (zaxis1[1] / zaxis1[2] * camera_matrix[1][1]) + cy
        p1 = (int(xp1), int(yp1))
        xp2 = (zaxis[0] / zaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (zaxis[1] / zaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))

        cv2.line(frame, p1, p2, (255, 0, 0), 2)
        cv2.circle(frame, p2, 3, (255, 0, 0), 2)

        return frame

    def estimate_headpose(self, frame, faces):
        # 4. Create Async Request
        scale = 50
        focal_length = 950.0
        frame_h, frame_w = frame.shape[:2]
        
        n, c, h, w = self.net.inputs[self.input_blob].shape
        
        if len(faces)>0:
            for face in faces[0][0]:
                box = face[3:7] * np.array([frame_w, frame_h, frame_w, frame_h])
                (xmin, ymin, xmax, ymax) = box.astype("int")
                face_frame = frame[ymin:ymax, xmin:xmax]
                
                if (face_frame.shape[0]==0) or (face_frame.shape[1]==0):
                    continue
                in_frame = self._in_frame(frame, n, c, h, w)                
                self.exec_net.start_async(request_id=0 ,inputs={self.input_blob: in_frame})
                if self.exec_net.requests[0].wait(-1) == 0:
                    yaw = .0  # Axis of rotation: y
                    pitch = .0  # Axis of rotation: x
                    roll = .0  # Axis of rotation: z
                    # Each output contains one float value that represents value in Tait-Bryan angles (yaw, pitсh or roll).
                    yaw = self.exec_net.requests[0].outputs['angle_y_fc'][0][0]
                    pitch = self.exec_net.requests[0].outputs['angle_p_fc'][0][0]
                    roll = self.exec_net.requests[0].outputs['angle_r_fc'][0][0]
                    # print("yaw:{:f}, pitch:{:f}, roll:{:f}".format(yaw, pitch, roll))
                    center_of_face = (xmin + face_frame.shape[1] / 2, ymin + face_frame.shape[0] / 2, 0)
                    self._draw_axes(frame, center_of_face, yaw, pitch, roll, scale, focal_length)
        else:
            pass
        
        return frame    

In [32]:
class Detector(object):
    def __init__(self, task='detect_face', rect=True):
        self.task = task
        self.rect = rect
        self._set_model()
        
    def _set_model(self):
        if self.task == 'detect_face':
            self.model_df = ModelDetectFace('detect_face')
        elif self.task == 'emotion_recognition':
            self.model_df = ModelDetectFace('detect_face')
            self.model_er = ModelEmotionRecognition('emotion_recognition')        
        elif self.task == 'estimate_headpose':
            self.model_df = ModelDetectFace('detect_face')
            self.model_eh = ModelEstimateHeadpose('estimate_headpose')
        else:
            raise NotImplementedError
        
    def compute(self, frame):
        if self.task == 'detect_face':
            frame = self.model_df.detect_face(frame)
        elif self.task == 'emotion_recognition':
            faces = self.model_df.get_face_pos(frame)
            frame = self.model_er.emotion_recognition(frame, faces, self.rect)
        elif self.task == 'estimate_headpose':
            faces = self.model_df.get_face_pos(frame)
            frame = self.model_eh.estimate_headpose(frame, faces)
        else:
            raise NotImplementedError
        
        return frame

# run

In [33]:
#task = 'detect_face'
#task = 'emotion_recognition'
task = 'estimate_headpose'

In [34]:
detector = Detector(task)

In [35]:
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)
while True:
    ret, frame = cap.read()
    
    frame = np.array(frame)

    #######
    
    frame = detector.compute(frame)
    
    #######
    cv2.imshow('demo', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()