diff --git a/.github/workflows/unit-tests-on-python.yml b/.github/workflows/unit-tests-on-python.yml index 59117ab7d5..c31fe71c31 100644 --- a/.github/workflows/unit-tests-on-python.yml +++ b/.github/workflows/unit-tests-on-python.yml @@ -25,7 +25,7 @@ jobs: sudo apt-get update && sudo apt-get install -y libjpeg-dev libpng-dev \ libtiff-dev libavformat-dev libpq-dev libfreeimage3 python -m pip install --upgrade pip - python -m pip --no-cache-dir install -r requirements.txt + python -m pip --no-cache-dir install -r requirements.txt -e srcext/mtcnn_tflite/ python -m src.services.facescan.plugins.setup - name: Test with pytest working-directory: ./embedding-calculator/ diff --git a/embedding-calculator/src/constants.py b/embedding-calculator/src/constants.py index ad4f5dfd38..d2ad7c6c03 100644 --- a/embedding-calculator/src/constants.py +++ b/embedding-calculator/src/constants.py @@ -25,7 +25,7 @@ class ENV(Constants): FACE_DETECTION_PLUGIN = get_env('FACE_DETECTION_PLUGIN', 'facenet.FaceDetector') CALCULATION_PLUGIN = get_env('CALCULATION_PLUGIN', 'facenet.Calculator') - EXTRA_PLUGINS = get_env_split('EXTRA_PLUGINS', 'facenet.LandmarksDetector,agegender.AgeDetector,agegender.GenderDetector,facenet.facemask.MaskDetector') + EXTRA_PLUGINS = get_env_split('EXTRA_PLUGINS', 'facenet.LandmarksDetector,agegender.AgeDetector,agegender.GenderDetector,facenet.facemask.MaskDetector,facenet.coralmtcnn.FaceDetector,facenet.coralmtcnn.Calculator') LOGGING_LEVEL_NAME = get_env('LOGGING_LEVEL_NAME', 'debug').upper() IS_DEV_ENV = get_env('FLASK_ENV', 'production') == 'development' diff --git a/embedding-calculator/src/services/facescan/plugins/facenet/coralmtcnn/__init__.py b/embedding-calculator/src/services/facescan/plugins/facenet/coralmtcnn/__init__.py new file mode 100644 index 0000000000..db89c3a1e9 --- /dev/null +++ b/embedding-calculator/src/services/facescan/plugins/facenet/coralmtcnn/__init__.py @@ -0,0 +1,15 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + +from src.services.facescan.plugins.dependencies import get_tensorflow + +requirements = get_tensorflow() diff --git a/embedding-calculator/src/services/facescan/plugins/facenet/coralmtcnn/coralmtcnn.py b/embedding-calculator/src/services/facescan/plugins/facenet/coralmtcnn/coralmtcnn.py new file mode 100644 index 0000000000..941507c213 --- /dev/null +++ b/embedding-calculator/src/services/facescan/plugins/facenet/coralmtcnn/coralmtcnn.py @@ -0,0 +1,170 @@ +# Copyright (c) 2020 the original author or authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. + +import logging +import math +import cv2 +from typing import List + +import tensorflow as tf +import numpy as np +from cached_property import cached_property +from mtcnn_tflite.MTCNN import MTCNN + +from src.constants import ENV +from src.services.dto.bounding_box import BoundingBoxDTO +from src.services.facescan.plugins import mixins +from src.services.facescan.imgscaler.imgscaler import ImgScaler +from src.services.imgtools.proc_img import crop_img, squish_img +from src.services.imgtools.types import Array3D +from src.services.utils.pyutils import get_current_dir + +from src.services.facescan.plugins import base + +CURRENT_DIR = get_current_dir(__file__) +logger = logging.getLogger(__name__) + +def prewhiten(img): + """ Normalize image.""" + mean = np.mean(img) + std = np.std(img) + std_adj = np.maximum(std, 1.0 / np.sqrt(img.size)) + y = np.multiply(np.subtract(img, mean), 1 / std_adj) + return y + +class FaceDetector(mixins.FaceDetectorMixin, base.BasePlugin): + FACE_MIN_SIZE = 20 + SCALE_FACTOR = 0.709 + BOX_MARGIN = 32 + IMAGE_SIZE = 160 + IMG_LENGTH_LIMIT = ENV.IMG_LENGTH_LIMIT + + # detection settings + det_prob_threshold = 0.85 + det_threshold_a = 0.9436513301 + det_threshold_b = 0.7059968943 + det_threshold_c = 0.5506904359 + + @cached_property + def _face_detection_net(self): + return MTCNN( + min_face_size=self.FACE_MIN_SIZE, + scale_factor=self.SCALE_FACTOR, + steps_threshold=[self.det_threshold_a, self.det_threshold_b, self.det_threshold_c] + ) + + def crop_face(self, img: Array3D, box: BoundingBoxDTO) -> Array3D: + return cv2.resize(crop_img(img, box), (self.IMAGE_SIZE, self.IMAGE_SIZE)) + + def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[BoundingBoxDTO]: + if det_prob_threshold is None: + det_prob_threshold = self.det_prob_threshold + assert 0 <= det_prob_threshold <= 1 + scaler = ImgScaler(self.IMG_LENGTH_LIMIT) + img = scaler.downscale_img(img) + + fdn = self._face_detection_net + detect_face_result = fdn.detect_faces(img) + img_size = np.asarray(img.shape)[0:2] + bounding_boxes = [] + + for face in detect_face_result: + x, y, w, h = face['box'] + margin_x = w / 8 + margin_y = h / 8 + box = BoundingBoxDTO( + x_min=int(np.maximum(x - margin_x, 0)), + y_min=int(np.maximum(y - margin_y, 0)), + x_max=int(np.minimum(x + w + margin_x, img_size[1])), + y_max=int(np.minimum(y + h + margin_y, img_size[0])), + np_landmarks=np.array([list(value) for value in face['keypoints'].values()]), + probability=face['confidence'] + ) + logger.debug(f"Found: {box}") + bounding_boxes.append(box) + + filtered_bounding_boxes = [] + for box in bounding_boxes: + box = box.scaled(scaler.upscale_coefficient) + if box.probability <= det_prob_threshold: + logger.debug(f'Box filtered out because below threshold ({det_prob_threshold}): {box}') + continue + filtered_bounding_boxes.append(box) + return filtered_bounding_boxes + + +class Calculator(mixins.CalculatorMixin, base.BasePlugin): + ml_models = ( + # converted facenet .tflite model + ('20180402-114759-edgetpu', '1Uwv8w6Uj5M_xdJI9sjay_wkoFoI_zbjk', (1.1817961, 5.291995557), 0.4), + ) + BATCH_SIZE = 25 + DELIGATES = 'libedgetpu.so.1' + + @property + def ml_model_file(self): + return str(self.ml_model.path / f'{self.ml_model.name}.tflite') + + @cached_property + def _embedding_calculator_tpu(self): + delegate_list = tf.lite.experimental.load_delegate(self.DELIGATES) + model = tf.lite.Interpreter( + model_path=self.ml_model_file, + experimental_delegates=[delegate_list]) + return model + + @cached_property + def _embedding_calculator(self): + model = tf.lite.Interpreter(model_path=self.ml_model_file) + return model + + def calc_embedding(self, face_img: Array3D, mode='CPU') -> Array3D: + return self._calculate_embeddings([face_img], mode)[0] + + def _calculate_embeddings(self, cropped_images, mode='CPU'): + """Run forward pass to calculate embeddings""" + if mode == 'TPU': + calc_model = self._embedding_calculator_tpu + else: + calc_model = self._embedding_calculator + cropped_images = [prewhiten(img).astype(np.float32) for img in cropped_images] + + input_details = calc_model.get_input_details() + input_index = input_details[0]['index'] + input_shape = input_details[0]['shape'] + input_size = tuple(input_shape[1:4]) + + output_details = calc_model.get_output_details() + output_index = output_details[0]['index'] + embedding_size = output_details[0]['shape'][1] + + image_count = len(cropped_images) + batches_per_epoch = int(math.ceil(1.0 * image_count / self.BATCH_SIZE)) + embeddings = np.zeros((image_count, embedding_size)) + preprocessed_images = np.array([img for img in cropped_images]) + + for i in range(batches_per_epoch): + start_index = i * self.BATCH_SIZE + end_index = min((i + 1) * self.BATCH_SIZE, image_count) + calc_model.resize_tensor_input(input_index, (end_index-start_index, input_size[0], input_size[1], input_size[2])) + calc_model.resize_tensor_input(output_index, (end_index-start_index, embedding_size)) + calc_model.allocate_tensors() + calc_model.set_tensor(input_index, preprocessed_images[start_index:end_index]) + calc_model.invoke() + embeddings[start_index:end_index, :] = calc_model.get_tensor(output_index) + return embeddings + + +class LandmarksDetector(mixins.LandmarksDetectorMixin, base.BasePlugin): + """ Extract landmarks from FaceDetector results.""" \ No newline at end of file diff --git a/embedding-calculator/srcext/mtcnn_tflite/LICENSE b/embedding-calculator/srcext/mtcnn_tflite/LICENSE new file mode 100644 index 0000000000..8cf6a94bc7 --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2019 Iván de Paz Centeno +Copyright (c) 2021 CDL Digidow + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/embedding-calculator/srcext/mtcnn_tflite/MANIFEST.in b/embedding-calculator/srcext/mtcnn_tflite/MANIFEST.in new file mode 100644 index 0000000000..eebfd18bb3 --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/MANIFEST.in @@ -0,0 +1 @@ +include mtcnn_tflite/data/mtcnn_weights.npy diff --git a/embedding-calculator/srcext/mtcnn_tflite/README.md b/embedding-calculator/srcext/mtcnn_tflite/README.md new file mode 100644 index 0000000000..45ee929b4e --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/README.md @@ -0,0 +1,51 @@ +# MTCNN face recognition + +Implementation of the [MTCNN face detection algorithm](https://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=7553523). This project converted the code from [ipazc/mtcnn](https://github.com/ipazc/mtcnn) to TF Lite. + +## Installation + +You can install the package through pip: + +``` +pip install mtcnn-tflite +``` + +## Quick start + +Similar to [the original implementation](https://github.com/ipazc/mtcnn), the following example illustrates the ease of use of this package: + +``` +>>> from mtcnn_tflite.MTCNN import MTCNN +>>> import cv2 +>>> +>>> img = cv2.cvtColor(cv2.imread("ivan.jpg"), cv2.COLOR_BGR2RGB) +>>> detector = MTCNN() +>>> detector.detect_faces(img) +[ + { + 'box': [276, 88, 51, 68], + 'confidence': 0.9989245533943176, + 'keypoints': { + 'left_eye': (291, 117), + 'right_eye': (314, 114), + 'nose': (303, 130), + 'mouth_left': (296, 143), + 'mouth_right': (314, 141) + } + } +] +``` + + +## Benchmark + +| Image size | TF version | Process time * | +|------------|---------------------------------------|----------------| +| 561x561 | [TF2](https://github.com/ipazc/mtcnn) | 698ms | +| 561x561 | **This repository** (TF Lite) | 445ms | + +\* executed on a CPU: Intel i7-10510U + +## License + +[MIT License](https://github.com/mobilesec/mtcnn-tflite/blob/master/LICENSE) diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/MTCNN.py b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/MTCNN.py new file mode 100644 index 0000000000..4300ada7ea --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/MTCNN.py @@ -0,0 +1,528 @@ +# MIT License +# +# Copyright (c) 2019 Iván de Paz Centeno +# Copyright (c) 2021 CDL Digidow +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# +# This code is derived from the MTCNN implementation of David Sandberg for Facenet +# (https://github.com/davidsandberg/facenet/) +# It has been rebuilt from scratch, taking the David Sandberg's implementation as a reference. +# + +import cv2 +import numpy as np +import pkg_resources +import os + +import mtcnn_tflite +from mtcnn_tflite.exceptions import InvalidImage +from mtcnn_tflite.ModelBuilder import ModelBuilder + +import tensorflow as tf + +class StageStatus(object): + """ + Keeps status between MTCNN stages + """ + + def __init__(self, pad_result: tuple = None, width=0, height=0): + self.width = width + self.height = height + self.dy = self.edy = self.dx = self.edx = self.y = self.ey = self.x = self.ex = self.tmpw = self.tmph = [] + + if pad_result is not None: + self.update(pad_result) + + def update(self, pad_result: tuple): + s = self + s.dy, s.edy, s.dx, s.edx, s.y, s.ey, s.x, s.ex, s.tmpw, s.tmph = pad_result + + +class MTCNN(object): + """ + Allows to perform MTCNN Detection -> + a) Detection of faces (with the confidence probability) + b) Detection of keypoints (left eye, right eye, nose, mouth_left, mouth_right) + """ + + def __init__(self, min_face_size: int = 20, steps_threshold: list = None, + scale_factor: float = 0.709): + """ + Initializes the MTCNN. + :param min_face_size: minimum size of the face to detect + :param steps_threshold: step's thresholds values + :param scale_factor: scale factor + """ + if steps_threshold is None: + steps_threshold = [0.6, 0.7, 0.7] + + self.lastImgShape = None + + self._min_face_size = min_face_size + self._steps_threshold = steps_threshold + self._scale_factor = scale_factor + + self.builder = ModelBuilder() + self._rnetlite, self._onetlite = self.builder.get_r_o_networks() + + self._rnetlite.allocate_tensors() + self._onetlite.allocate_tensors() + + @property + def min_face_size(self): + return self._min_face_size + + @min_face_size.setter + def min_face_size(self, mfc=20): + try: + self._min_face_size = int(mfc) + except ValueError: + self._min_face_size = 20 + + def __compute_scale_pyramid(self, m, min_layer): + scales = [] + factor_count = 0 + + while min_layer >= 12: + scales += [m * np.power(self._scale_factor, factor_count)] + min_layer = min_layer * self._scale_factor + factor_count += 1 + + return scales + + @staticmethod + def __scale_image(image, scale: float): + """ + Scales the image to a given scale. + :param image: + :param scale: + :return: + """ + height, width, _ = image.shape + + width_scaled = int(np.ceil(width * scale)) + height_scaled = int(np.ceil(height * scale)) + + im_data = cv2.resize(image, (width_scaled, height_scaled), interpolation=cv2.INTER_AREA) + + # Normalize the image's pixels + im_data_normalized = (im_data - 127.5) * 0.0078125 + + return im_data_normalized + + @staticmethod + def __generate_bounding_box(imap, reg, scale, t): + + # use heatmap to generate bounding boxes + stride = 2 + cellsize = 12 + + imap = np.transpose(imap) + dx1 = np.transpose(reg[:, :, 0]) + dy1 = np.transpose(reg[:, :, 1]) + dx2 = np.transpose(reg[:, :, 2]) + dy2 = np.transpose(reg[:, :, 3]) + + y, x = np.where(imap >= t) + + if y.shape[0] == 1: + dx1 = np.flipud(dx1) + dy1 = np.flipud(dy1) + dx2 = np.flipud(dx2) + dy2 = np.flipud(dy2) + + score = imap[(y, x)] + reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]])) + + if reg.size == 0: + reg = np.empty(shape=(0, 3)) + + bb = np.transpose(np.vstack([y, x])) + + q1 = np.fix((stride * bb + 1) / scale) + q2 = np.fix((stride * bb + cellsize) / scale) + boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg]) + + return boundingbox, reg + + @staticmethod + def __nms(boxes, threshold, method): + """ + Non Maximum Suppression. + + :param boxes: np array with bounding boxes. + :param threshold: + :param method: NMS method to apply. Available values ('Min', 'Union') + :return: + """ + if boxes.size == 0: + return np.empty((0, 3)) + + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + s = boxes[:, 4] + + area = (x2 - x1 + 1) * (y2 - y1 + 1) + sorted_s = np.argsort(s) + + pick = np.zeros_like(s, dtype=np.int16) + counter = 0 + while sorted_s.size > 0: + i = sorted_s[-1] + pick[counter] = i + counter += 1 + idx = sorted_s[0:-1] + + xx1 = np.maximum(x1[i], x1[idx]) + yy1 = np.maximum(y1[i], y1[idx]) + xx2 = np.minimum(x2[i], x2[idx]) + yy2 = np.minimum(y2[i], y2[idx]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + + inter = w * h + + if method == 'Min': + o = inter / np.minimum(area[i], area[idx]) + else: + o = inter / (area[i] + area[idx] - inter) + + sorted_s = sorted_s[np.where(o <= threshold)] + + pick = pick[0:counter] + + return pick + + @staticmethod + def __pad(total_boxes, w, h): + # compute the padding coordinates (pad the bounding boxes to square) + tmpw = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32) + tmph = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32) + numbox = total_boxes.shape[0] + + dx = np.ones(numbox, dtype=np.int32) + dy = np.ones(numbox, dtype=np.int32) + edx = tmpw.copy().astype(np.int32) + edy = tmph.copy().astype(np.int32) + + x = total_boxes[:, 0].copy().astype(np.int32) + y = total_boxes[:, 1].copy().astype(np.int32) + ex = total_boxes[:, 2].copy().astype(np.int32) + ey = total_boxes[:, 3].copy().astype(np.int32) + + tmp = np.where(ex > w) + edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmpw[tmp], 1) + ex[tmp] = w + + tmp = np.where(ey > h) + edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmph[tmp], 1) + ey[tmp] = h + + tmp = np.where(x < 1) + dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1) + x[tmp] = 1 + + tmp = np.where(y < 1) + dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1) + y[tmp] = 1 + + return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph + + @staticmethod + def __rerec(bbox): + # convert bbox to square + height = bbox[:, 3] - bbox[:, 1] + width = bbox[:, 2] - bbox[:, 0] + max_side_length = np.maximum(width, height) + bbox[:, 0] = bbox[:, 0] + width * 0.5 - max_side_length * 0.5 + bbox[:, 1] = bbox[:, 1] + height * 0.5 - max_side_length * 0.5 + bbox[:, 2:4] = bbox[:, 0:2] + np.transpose(np.tile(max_side_length, (2, 1))) + return bbox + + @staticmethod + def __bbreg(boundingbox, reg): + # calibrate bounding boxes + if reg.shape[1] == 1: + reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) + + w = boundingbox[:, 2] - boundingbox[:, 0] + 1 + h = boundingbox[:, 3] - boundingbox[:, 1] + 1 + b1 = boundingbox[:, 0] + reg[:, 0] * w + b2 = boundingbox[:, 1] + reg[:, 1] * h + b3 = boundingbox[:, 2] + reg[:, 2] * w + b4 = boundingbox[:, 3] + reg[:, 3] * h + boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4])) + return boundingbox + + def detect_faces(self, img) -> list: + """ + Detects bounding boxes from the specified image. + :param img: image to process + :return: list containing all the bounding boxes detected with their keypoints. + """ + if img is None or not hasattr(img, "shape"): + raise InvalidImage("Image not valid.") + + height, width, _ = img.shape + if self.lastImgShape != img.shape: + self._pnetlites = self.builder.create_pnet((height, width)) + + for i in range(len(self._pnetlites)): + self._pnetlites[i].allocate_tensors() + + self.lastImgShape = img.shape + + stage_status = StageStatus(width=width, height=height) + + m = 12 / self._min_face_size + min_layer = np.amin([height, width]) * m + + scales = self.__compute_scale_pyramid(m, min_layer) + + stages = [self.__stage1, self.__stage2, self.__stage3] + result = [scales, stage_status] + + # We pipe here each of the stages + for stage in stages: + result = stage(img, result[0], result[1]) + + [total_boxes, points] = result + + bounding_boxes = [] + + for bounding_box, keypoints in zip(total_boxes, points.T): + x = max(0, int(bounding_box[0])) + y = max(0, int(bounding_box[1])) + width = int(bounding_box[2] - x) + height = int(bounding_box[3] - y) + bounding_boxes.append({ + 'box': [x, y, width, height], + 'confidence': bounding_box[-1], + 'keypoints': { + 'left_eye': (int(keypoints[0]), int(keypoints[5])), + 'right_eye': (int(keypoints[1]), int(keypoints[6])), + 'nose': (int(keypoints[2]), int(keypoints[7])), + 'mouth_left': (int(keypoints[3]), int(keypoints[8])), + 'mouth_right': (int(keypoints[4]), int(keypoints[9])), + } + }) + + return bounding_boxes + + + def __stage1(self, image, scales: list, stage_status: StageStatus): + """ + First stage of the MTCNN. + :param image: + :param scales: + :param stage_status: + :return: + """ + total_boxes = np.empty((0, 9)) + status = stage_status + + ctr = 0 + for scale in scales: + scaled_image = self.__scale_image(image, scale) + + img_x = np.expand_dims(scaled_image, 0) + img_y = np.transpose(img_x, (0, 2, 1, 3)) + img_y = np.float32(img_y) + + pnetlite = self._pnetlites[ctr] + ctr += 1 + + pnetlite.set_tensor(pnetlite.get_input_details()[0]['index'], img_y) + pnetlite.invoke() + + out0 = np.transpose(pnetlite.get_tensor(pnetlite.get_output_details()[0]['index']), (0, 2, 1, 3)) + out1 = np.transpose(pnetlite.get_tensor(pnetlite.get_output_details()[1]['index']), (0, 2, 1, 3)) + + boxes, _ = self.__generate_bounding_box(out1[0, :, :, 1].copy(), + out0[0, :, :, :].copy(), scale, self._steps_threshold[0]) + + # inter-scale nms + pick = self.__nms(boxes.copy(), 0.5, 'Union') + if boxes.size > 0 and pick.size > 0: + boxes = boxes[pick, :] + total_boxes = np.append(total_boxes, boxes, axis=0) + + numboxes = total_boxes.shape[0] + + if numboxes > 0: + pick = self.__nms(total_boxes.copy(), 0.7, 'Union') + total_boxes = total_boxes[pick, :] + + regw = total_boxes[:, 2] - total_boxes[:, 0] + regh = total_boxes[:, 3] - total_boxes[:, 1] + + qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw + qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh + qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw + qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh + + total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]])) + total_boxes = self.__rerec(total_boxes.copy()) + + total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32) + status = StageStatus(self.__pad(total_boxes.copy(), stage_status.width, stage_status.height), + width=stage_status.width, height=stage_status.height) + + return total_boxes, status + + def __stage2(self, img, total_boxes, stage_status: StageStatus): + """ + Second stage of the MTCNN. + :param img: + :param total_boxes: + :param stage_status: + :return: + """ + num_boxes = total_boxes.shape[0] + if num_boxes == 0: + return total_boxes, stage_status + + # second stage + tempimg = np.zeros(shape=(24, 24, 3, num_boxes)) + + for k in range(0, num_boxes): + tmp = np.zeros((int(stage_status.tmph[k]), int(stage_status.tmpw[k]), 3)) + + tmp[stage_status.dy[k] - 1:stage_status.edy[k], stage_status.dx[k] - 1:stage_status.edx[k], :] = \ + img[stage_status.y[k] - 1:stage_status.ey[k], stage_status.x[k] - 1:stage_status.ex[k], :] + + if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: + tempimg[:, :, :, k] = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_AREA) + + else: + return np.empty(shape=(0,)), stage_status + + tempimg = (tempimg - 127.5) * 0.0078125 + tempimg1 = np.transpose(tempimg, (3, 1, 0, 2)) + tempimg1 = np.float32(tempimg1) + + out = [[],[]] + for dimension in tempimg1: + self._rnetlite.set_tensor(self._rnetlite.get_input_details()[0]['index'], np.expand_dims(dimension, 0)) + self._rnetlite.invoke() + + out[0].append(self._rnetlite.get_tensor(self._rnetlite.get_output_details()[0]['index'])[0]) + out[1].append(self._rnetlite.get_tensor(self._rnetlite.get_output_details()[1]['index'])[0]) + + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + + score = out1[1, :] + + ipass = np.where(score > self._steps_threshold[1]) + + total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)]) + + mv = out0[:, ipass[0]] + + if total_boxes.shape[0] > 0: + pick = self.__nms(total_boxes, 0.7, 'Union') + total_boxes = total_boxes[pick, :] + total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv[:, pick])) + total_boxes = self.__rerec(total_boxes.copy()) + + return total_boxes, stage_status + + + def __stage3(self, img, total_boxes, stage_status: StageStatus): + """ + Third stage of the MTCNN. + + :param img: + :param total_boxes: + :param stage_status: + :return: + """ + num_boxes = total_boxes.shape[0] + if num_boxes == 0: + return total_boxes, np.empty(shape=(0,)) + + total_boxes = np.fix(total_boxes).astype(np.int32) + + status = StageStatus(self.__pad(total_boxes.copy(), stage_status.width, stage_status.height), + width=stage_status.width, height=stage_status.height) + + tempimg = np.zeros((48, 48, 3, num_boxes)) + + for k in range(0, num_boxes): + + tmp = np.zeros((int(status.tmph[k]), int(status.tmpw[k]), 3)) + + tmp[status.dy[k] - 1:status.edy[k], status.dx[k] - 1:status.edx[k], :] = \ + img[status.y[k] - 1:status.ey[k], status.x[k] - 1:status.ex[k], :] + + if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: + tempimg[:, :, :, k] = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_AREA) + else: + return np.empty(shape=(0,)), np.empty(shape=(0,)) + + tempimg = (tempimg - 127.5) * 0.0078125 + tempimg1 = np.transpose(tempimg, (3, 1, 0, 2)) + + outlite = [] + tempimg1 = np.float32(tempimg1) + + out = [[],[],[]] + for dimension in tempimg1: + self._onetlite.set_tensor(self._onetlite.get_input_details()[0]['index'], np.expand_dims(dimension, 0)) + self._onetlite.invoke() + + out[0].append(self._onetlite.get_tensor(self._onetlite.get_output_details()[0]['index'])[0]) + out[1].append(self._onetlite.get_tensor(self._onetlite.get_output_details()[1]['index'])[0]) + out[2].append(self._onetlite.get_tensor(self._onetlite.get_output_details()[2]['index'])[0]) + + out0 = np.transpose(out[0]) + out1 = np.transpose(out[1]) + out2 = np.transpose(out[2]) + + score = out2[1, :] + + points = out1 + + ipass = np.where(score > self._steps_threshold[2]) + + points = points[:, ipass[0]] + + total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)]) + + mv = out0[:, ipass[0]] + + w = total_boxes[:, 2] - total_boxes[:, 0] + 1 + h = total_boxes[:, 3] - total_boxes[:, 1] + 1 + + points[0:5, :] = np.tile(w, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1 + points[5:10, :] = np.tile(h, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1 + + if total_boxes.shape[0] > 0: + total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv)) + pick = self.__nms(total_boxes.copy(), 0.7, 'Min') + total_boxes = total_boxes[pick, :] + points = points[:, pick] + + return total_boxes, points diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/ModelBuilder.py b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/ModelBuilder.py new file mode 100644 index 0000000000..ee0713e278 --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/ModelBuilder.py @@ -0,0 +1,200 @@ +# MIT License +# +# Copyright (c) 2021 CDL Digidow +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + + +import numpy as np +import math + +from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, PReLU, Flatten, Softmax +from tensorflow.keras.models import Model +import tensorflow as tf +from fcache.cache import FileCache +import os +import mtcnn_tflite + +class ModelBuilder: + def __init__(self, min_face_size=20, scale_factor=0.709): + self.min_face_size = min_face_size + self.scale_factor = scale_factor + + self.cache = FileCache('mtcnn-tflite-models') + data_path = os.path.join(os.path.dirname(mtcnn_tflite.__file__), "data") + self.weights_file = os.path.join(data_path, "mtcnn_weights.npy") + #delegate_list = tf.lite.experimental.load_delegate('libedgetpu.so.1') + + if "r_net" not in self.cache: + r_net = self.build_rnet() + converter = tf.lite.TFLiteConverter.from_keras_model(r_net) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + + r_net = converter.convert() + self.cache["r_net"] = r_net + + self.r_net = tf.lite.Interpreter(model_content=self.cache["r_net"], experimental_delegates = None) #[delegate_list] + + if "o_net" not in self.cache: + o_net = self.build_onet() + converter = tf.lite.TFLiteConverter.from_keras_model(o_net) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + + o_net = converter.convert() + self.cache["o_net"] = o_net + + self.o_net = tf.lite.Interpreter(model_content=self.cache["o_net"], experimental_delegates = None )#[delegate_list] + + self.cache.sync() + + def get_networks(self): + return (self.p_nets, self.r_net, self.o_net) + + def get_r_o_networks(self): + return (self.r_net, self.o_net) + + def clear_cache(self): + self.cache.clear() + + def create_pnet(self, image_dimension): + img_width, img_height = image_dimension + scales = self.get_scales(self.min_face_size, img_width, img_height, self.scale_factor) + #delegate_list = tf.lite.experimental.load_delegate('libedgetpu.so.1') # + if str(image_dimension) not in self.cache: + ctr = 0 + p_nets = [] + for scale in scales: + p_net = self.build_pnet((math.ceil(img_height*scale), math.ceil(img_width*scale), 3)) + converter = tf.lite.TFLiteConverter.from_keras_model(p_net) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + tflite_model = converter.convert() + p_nets.append(tflite_model) + self.cache[str(image_dimension)] = p_nets + self.cache.sync() + + self.p_nets = [] + for p_net in self.cache[str(image_dimension)]: + self.p_nets.append(tf.lite.Interpreter(model_content=p_net, experimental_delegates = None))#[delegate_list] + + return self.p_nets + + + def build_pnet(self, input_shape): + p_inp = Input(input_shape) + + p_layer = Conv2D(10, kernel_size=(3, 3), strides=(1, 1), padding="valid")(p_inp) + p_layer = PReLU(shared_axes=[1, 2])(p_layer) + p_layer = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(p_layer) + + p_layer = Conv2D(16, kernel_size=(3, 3), strides=(1, 1), padding="valid")(p_layer) + p_layer = PReLU(shared_axes=[1, 2])(p_layer) + + p_layer = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="valid")(p_layer) + p_layer = PReLU(shared_axes=[1, 2])(p_layer) + + p_layer_out1 = Conv2D(2, kernel_size=(1, 1), strides=(1, 1))(p_layer) + p_layer_out1 = Softmax(axis=3)(p_layer_out1) + + p_layer_out2 = Conv2D(4, kernel_size=(1, 1), strides=(1, 1))(p_layer) + + p_net = Model(p_inp, [p_layer_out2, p_layer_out1]) + + weights = np.load(self.weights_file, allow_pickle=True).tolist() + p_net.set_weights(weights['pnet']) + + return p_net + + def build_rnet(self): + input_shape = (24, 24, 3) + + r_inp = Input(input_shape) + + r_layer = Conv2D(28, kernel_size=(3, 3), strides=(1, 1), padding="valid")(r_inp) + r_layer = PReLU(shared_axes=[1, 2])(r_layer) + r_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(r_layer) + + r_layer = Conv2D(48, kernel_size=(3, 3), strides=(1, 1), padding="valid")(r_layer) + r_layer = PReLU(shared_axes=[1, 2])(r_layer) + r_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(r_layer) + + r_layer = Conv2D(64, kernel_size=(2, 2), strides=(1, 1), padding="valid")(r_layer) + r_layer = PReLU(shared_axes=[1, 2])(r_layer) + r_layer = Flatten()(r_layer) + r_layer = Dense(128)(r_layer) + r_layer = PReLU()(r_layer) + + r_layer_out1 = Dense(2)(r_layer) + r_layer_out1 = Softmax(axis=1)(r_layer_out1) + + r_layer_out2 = Dense(4)(r_layer) + + r_net = Model(r_inp, [r_layer_out2, r_layer_out1]) + + weights = np.load(self.weights_file, allow_pickle=True).tolist() + r_net.set_weights(weights['rnet']) + + return r_net + + def build_onet(self): + input_shape = (48, 48, 3) + o_inp = Input(input_shape) + o_layer = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_inp) + o_layer = PReLU(shared_axes=[1, 2])(o_layer) + o_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(o_layer) + + o_layer = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_layer) + o_layer = PReLU(shared_axes=[1, 2])(o_layer) + o_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(o_layer) + + o_layer = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_layer) + o_layer = PReLU(shared_axes=[1, 2])(o_layer) + o_layer = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(o_layer) + + o_layer = Conv2D(128, kernel_size=(2, 2), strides=(1, 1), padding="valid")(o_layer) + o_layer = PReLU(shared_axes=[1, 2])(o_layer) + + o_layer = Flatten()(o_layer) + o_layer = Dense(256)(o_layer) + o_layer = PReLU()(o_layer) + + o_layer_out1 = Dense(2)(o_layer) + o_layer_out1 = Softmax(axis=1)(o_layer_out1) + o_layer_out2 = Dense(4)(o_layer) + o_layer_out3 = Dense(10)(o_layer) + + o_net = Model(o_inp, [o_layer_out2, o_layer_out3, o_layer_out1]) + + weights = np.load(self.weights_file, allow_pickle=True).tolist() + o_net.set_weights(weights['onet']) + + return o_net + + def get_scales(self, min_face_size, img_width, img_height, scale_factor): + m = 12 / min_face_size + min_layer = np.amin([img_height, img_width]) * m + scales = [] + factor_count = 0 + + while min_layer >= 12: + scales += [m * np.power(scale_factor, factor_count)] + min_layer = min_layer * scale_factor + factor_count += 1 + + return scales diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/__init__.py b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/__init__.py new file mode 100644 index 0000000000..c6131e011e --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/__init__.py @@ -0,0 +1,4 @@ +from mtcnn_tflite import MTCNN + + +__all__ = ['MTCNN'] diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/ivan.jpg b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/ivan.jpg new file mode 100644 index 0000000000..401e3ba051 Binary files /dev/null and b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/ivan.jpg differ diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/mtcnn_weights.npy b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/mtcnn_weights.npy new file mode 100644 index 0000000000..adef02b6cc Binary files /dev/null and b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/mtcnn_weights.npy differ diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/no-faces.jpg b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/no-faces.jpg new file mode 100644 index 0000000000..0c51e1cc12 Binary files /dev/null and b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/data/no-faces.jpg differ diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/exceptions/__init__.py b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/exceptions/__init__.py new file mode 100644 index 0000000000..b39e13ebdc --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/exceptions/__init__.py @@ -0,0 +1,26 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +# MIT License +# +# Copyright (c) 2019 Iván de Paz Centeno +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from mtcnn_tflite.exceptions.invalid_image import InvalidImage diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/exceptions/invalid_image.py b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/exceptions/invalid_image.py new file mode 100755 index 0000000000..fbb558efd1 --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/exceptions/invalid_image.py @@ -0,0 +1,30 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +# MIT License +# +# Copyright (c) 2019 Iván de Paz Centeno +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +__author__ = "Iván de Paz Centeno" + +class InvalidImage(Exception): + pass diff --git a/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/requirements.txt b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/requirements.txt new file mode 100644 index 0000000000..50a294c226 --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/mtcnn_tflite/requirements.txt @@ -0,0 +1,5 @@ +tensorflow +numpy +opencv-python +pytest +fcache \ No newline at end of file diff --git a/embedding-calculator/srcext/mtcnn_tflite/setup.cfg b/embedding-calculator/srcext/mtcnn_tflite/setup.cfg new file mode 100644 index 0000000000..b7e478982c --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/setup.cfg @@ -0,0 +1,2 @@ +[aliases] +test=pytest diff --git a/embedding-calculator/srcext/mtcnn_tflite/setup.py b/embedding-calculator/srcext/mtcnn_tflite/setup.py new file mode 100644 index 0000000000..f5b589bc25 --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/setup.py @@ -0,0 +1,52 @@ +# MIT License +# +# Copyright (c) 2021 CDL Digidow +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import setuptools + +with open("README.md", "r") as fh: + long_description = fh.read() + +setuptools.setup( + name='mtcnn_tflite', + version='0.0.4', + author="Philipp Hofer", + author_email="philipp.hofer@ins.jku.at", + description="MTCNN face detection implementation in Tensorflow Lite.", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/mobilesec/mtcnn-tflite", + packages=setuptools.find_packages(exclude=["tests.*", "tests"]), + install_requires=[ + "tensorflow>=2.0.0", + "numpy", + "opencv-python~=4.4.0", + "fcache>=0.4.0" + ], + tests_require=['pytest'], + license="MIT License", + classifiers=[ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent" + ], + include_package_data=True +) diff --git a/embedding-calculator/srcext/mtcnn_tflite/tests/test_mtcnn.py b/embedding-calculator/srcext/mtcnn_tflite/tests/test_mtcnn.py new file mode 100644 index 0000000000..ece8487289 --- /dev/null +++ b/embedding-calculator/srcext/mtcnn_tflite/tests/test_mtcnn.py @@ -0,0 +1,92 @@ +import unittest +import cv2 + +from mtcnn_tflite.exceptions import InvalidImage +from mtcnn_tflite.MTCNN import MTCNN + +mtcnn = None + + +class TestMTCNN(unittest.TestCase): + + @classmethod + def setUpClass(cls): + global mtcnn + mtcnn = MTCNN() + + def test_detect_faces(self): + """ + MTCNN is able to detect faces and landmarks on an image + :return: + """ + ivan = cv2.imread("mtcnn_tflite/data/ivan.jpg") + + result = mtcnn.detect_faces(ivan) # type: list + + self.assertEqual(len(result), 1) + + first = result[0] + + self.assertIn('box', first) + self.assertIn('keypoints', first) + self.assertTrue(len(first['box']), 1) + self.assertTrue(len(first['keypoints']), 5) + + keypoints = first['keypoints'] # type: dict + self.assertIn('nose', keypoints) + self.assertIn('mouth_left', keypoints) + self.assertIn('mouth_right', keypoints) + self.assertIn('left_eye', keypoints) + self.assertIn('right_eye', keypoints) + + self.assertEqual(len(keypoints['nose']), 2) + self.assertEqual(len(keypoints['mouth_left']), 2) + self.assertEqual(len(keypoints['mouth_right']), 2) + self.assertEqual(len(keypoints['left_eye']), 2) + self.assertEqual(len(keypoints['right_eye']), 2) + + def test_detect_faces_invalid_content(self): + """ + MTCNN detects invalid images + :return: + """ + ivan = cv2.imread("mtcnn_tflite/MTCNN.py") + + with self.assertRaises(InvalidImage): + result = mtcnn.detect_faces(ivan) # type: list + + def test_detect_no_faces_on_no_faces_content(self): + """ + MTCNN successfully reports an empty list when no faces are detected. + :return: + """ + ivan = cv2.imread("mtcnn_tflite/data/no-faces.jpg") + + result = mtcnn.detect_faces(ivan) # type: list + self.assertEqual(len(result), 0) + + + def test_mtcnn_multiple_instances(self): + """ + Multiple instances of MTCNN can be created in the same thread. + :return: + """ + detector_1 = MTCNN(steps_threshold=[.2, .7, .7]) + detector_2 = MTCNN(steps_threshold=[.1, .1, .1]) + + ivan = cv2.imread("mtcnn_tflite/data/ivan.jpg") + + faces_1 = detector_1.detect_faces(ivan) + faces_2 = detector_2.detect_faces(ivan) + + self.assertEqual(len(faces_1), 1) + self.assertGreater(len(faces_2), 1) + + @classmethod + def tearDownClass(cls): + global mtcnn + del mtcnn + + +if __name__ == '__main__': + unittest.main() diff --git a/embedding-calculator/tpu.Dockerfile b/embedding-calculator/tpu.Dockerfile new file mode 100644 index 0000000000..4d2f83f4b2 --- /dev/null +++ b/embedding-calculator/tpu.Dockerfile @@ -0,0 +1,63 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE:-python:3.7-slim} + +RUN apt-get update && apt-get install -y build-essential cmake git wget unzip \ + curl yasm pkg-config libswscale-dev libtbb2 libtbb-dev libjpeg-dev \ + libpng-dev libtiff-dev libavformat-dev libpq-dev libfreeimage3 \ + && rm -rf /var/lib/apt/lists/* + +# install drivers for coral tau +RUN echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list +RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - +RUN apt-get update && apt-get install -y libedgetpu1-std + +# install common python packages +SHELL ["/bin/bash", "-c"] +WORKDIR /app/ml +COPY requirements.txt . +RUN pip --no-cache-dir install -r requirements.txt + +ARG BE_VERSION +ARG APP_VERSION_STRING +ENV BE_VERSION=$BE_VERSION +ENV APP_VERSION_STRING=$APP_VERSION_STRING +ENV HOME=/app/ml +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 +ENV PYTHONUNBUFFERED=0 +ENV JOBLIB_MULTIPROCESSING=0 + +# download ML models +ARG INTEL_OPTIMIZATION=false +ARG GPU_IDX=-1 +ENV GPU_IDX=$GPU_IDX INTEL_OPTIMIZATION=$INTEL_OPTIMIZATION +ARG FACE_DETECTION_PLUGIN="facenet.coralmtcnn.FaceDetector" +ARG CALCULATION_PLUGIN="facenet.coralmtcnn.Calculator" +ARG EXTRA_PLUGINS="facenet.LandmarksDetector,agegender.AgeDetector,agegender.GenderDetector,facenet.facemask.MaskDetector" +ENV FACE_DETECTION_PLUGIN=$FACE_DETECTION_PLUGIN CALCULATION_PLUGIN=$CALCULATION_PLUGIN \ + EXTRA_PLUGINS=$EXTRA_PLUGINS +COPY src src +COPY srcext srcext +RUN pip --no-cache-dir install srcext/mtcnn_tflite/ +RUN python -m src.services.facescan.plugins.setup + +# copy rest of the code +COPY tools tools +COPY sample_images sample_images + +# run tests +ARG SKIP_TESTS +COPY pytest.ini . +RUN if [ -z $SKIP_TESTS ]; then pytest -m "not performance" /app/ml/src; fi + +# create folder for tflite model +RUN mkdir -p /app/ml/.cache/mtcnn-tflite-models +RUN chmod a+rwx -R /app/ml/.cache/mtcnn-tflite-models +USER root + +EXPOSE 3000 + +COPY uwsgi.ini . +ENV UWSGI_PROCESSES=${UWSGI_PROCESSES:-2} +ENV UWSGI_THREADS=1 +CMD ["uwsgi", "--ini", "uwsgi.ini"] diff --git a/embedding-calculator/tpu.Dockerfile.full b/embedding-calculator/tpu.Dockerfile.full new file mode 100644 index 0000000000..17e1e78ae9 --- /dev/null +++ b/embedding-calculator/tpu.Dockerfile.full @@ -0,0 +1,82 @@ +FROM debian:buster-slim + +RUN apt-get update + +RUN apt-get install --no-install-recommends -y \ + gnupg \ + ca-certificates \ + curl \ + apt-utils \ + apt-transport-https + +# Python package management and basic dependencies +RUN apt-get install -y curl python3.7 python3.7-dev python3.7-distutils + +# Set python 3 as the default python +RUN update-alternatives --set python /usr/bin/python3.7 + +# Upgrade pip to latest version +RUN curl -s https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python get-pip.py --force-reinstall && \ + rm get-pip.py + +RUN apt-get update && apt-get install -y build-essential cmake git wget unzip \ + curl yasm pkg-config libswscale-dev libtbb2 libtbb-dev libjpeg-dev \ + libpng-dev libtiff-dev libavformat-dev libpq-dev libfreeimage3 \ + && rm -rf /var/lib/apt/lists/* + +# install drivers for coral tpu +RUN echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | tee /etc/apt/sources.list.d/coral-edgetpu.list +RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - +RUN apt-get update && apt-get install -y libedgetpu1-std + +# install common python packages +SHELL ["/bin/bash", "-c"] +WORKDIR /app/ml +COPY requirements.txt . +RUN pip --no-cache-dir install -r requirements.txt + +ARG BE_VERSION +ARG APP_VERSION_STRING +ENV BE_VERSION=$BE_VERSION +ENV APP_VERSION_STRING=$APP_VERSION_STRING +ENV HOME=/app/ml +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 +ENV PYTHONUNBUFFERED=0 +ENV JOBLIB_MULTIPROCESSING=0 + +# download ML models +ARG INTEL_OPTIMIZATION=false +ARG GPU_IDX=-1 +ENV GPU_IDX=$GPU_IDX INTEL_OPTIMIZATION=$INTEL_OPTIMIZATION +ARG FACE_DETECTION_PLUGIN="facenet.coralmtcnn.FaceDetector" +ARG CALCULATION_PLUGIN="facenet.coralmtcnn.Calculator" +ARG EXTRA_PLUGINS="facenet.LandmarksDetector,agegender.AgeDetector,agegender.GenderDetector,facenet.facemask.MaskDetector" +ENV FACE_DETECTION_PLUGIN=$FACE_DETECTION_PLUGIN CALCULATION_PLUGIN=$CALCULATION_PLUGIN \ + EXTRA_PLUGINS=$EXTRA_PLUGINS +COPY src src +COPY srcext srcext +RUN pip --no-cache-dir install srcext/mtcnn_tflite/ +RUN python -m src.services.facescan.plugins.setup + +# copy rest of the code +COPY tools tools +COPY sample_images sample_images + +# run tests +ARG SKIP_TESTS +COPY pytest.ini . +RUN if [ -z $SKIP_TESTS ]; then pytest -m "not performance" /app/ml/src; fi + +# create folder for tflite model +RUN mkdir -p /app/ml/.cache/mtcnn-tflite-models +RUN chmod a+rwx -R /app/ml/.cache/mtcnn-tflite-models +USER root + +EXPOSE 3000 + +COPY uwsgi.ini . +ENV UWSGI_PROCESSES=${UWSGI_PROCESSES:-2} +ENV UWSGI_THREADS=1 +CMD ["uwsgi", "--ini", "uwsgi.ini"]