In [1]:
import os
import cv2
from ultralytics import YOLO
from retinaface import RetinaFace
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import numpy as np
from dataclasses import dataclass
from typing import Any, List
import numpy as np

# Modelo    

In [2]:
@dataclass
class Emotions:
    name : str
    quantity : int

@dataclass
class Point:
    x : float
    y : float

@dataclass
class BoundingBox:
    origin : Point
    end: Point


class Genre:
    male = 'MALE'
    female = 'FEMALE'

    def __init__(self, genre : str):
        self.genre = genre

    @staticmethod
    def female():
        return Genre(Genre.female)
    
    @staticmethod
    def male():
        return Genre(Genre.male)
    
    def isMale(self):
        return self.genre == Genre.male

    def isFemale(self):
        return self.genre == Genre.female

@dataclass
class Person:
    age : str
    genre : Genre
    emotions : List[Emotions]
    bounding_box : BoundingBox
    image : np.ndarray

@dataclass
class FaceDetectorResult:
    image : np.ndarray 
    bounding_box : BoundingBox

@dataclass
class FaceComparatorResult:
    similarity : float

# Aplicación

In [3]:
from abc import ABC, abstractmethod
from typing import List

class FaceDetector(ABC):
    @abstractmethod
    def detect(self, image : np.ndarray) -> List[FaceDetectorResult]:
        pass

class FaceQualifier(ABC):
    @abstractmethod
    def qualify(self, face_detector_result : FaceDetectorResult) -> Person:
        pass

class FaceComparator(ABC):
    @abstractmethod
    def qualify(self, first_face : FaceDetectorResult, second_face: FaceDetectorResult) -> FaceComparatorResult:
        pass

# Infraestructura


In [4]:
class ImageUtils:
    @staticmethod
    def crop(image : np.array, bounding_box : BoundingBox) -> np.ndarray:
        return image[bounding_box.origin.x:bounding_box.end.x, bounding_box.origin.y:bounding_box.end.y]
    
    @staticmethod
    def overlay_icon(image: np.array, icon_path: str, color: tuple, icon_size : int, point : tuple) -> np.ndarray:
        icon = cv2.imread(icon_path, cv2.IMREAD_GRAYSCALE)
        icon = cv2.resize(icon, (icon_size, icon_size))
        mask = icon == 0
        color_layer = np.full((icon.shape[0], icon.shape[1], 3), color, dtype=np.uint8)
        np.copyto(image[point[1]-icon.shape[0]//2:point[1]+icon.shape[0]//2, point[0]-icon.shape[1]//2:point[0]+icon.shape[1]//2], color_layer, where=mask[:,:,None])
        return image

In [5]:

class ViolaJonesFaceDetector(FaceDetector):
    cascPathface = os.path.dirname(
        cv2.__file__) + "/data/haarcascade_frontalface_alt2.xml"

    def detect(self, image : np.ndarray) -> List[FaceDetectorResult]:
        faceCascade = cv2.CascadeClassifier(self.cascPathface)
        gray = self._convert_image_to_gray(image)
        faces = faceCascade.detectMultiScale(gray,
                                         scaleFactor=1.1,
                                         minNeighbors=5,
                                         minSize=(60, 60),
                                         flags=cv2.CASCADE_SCALE_IMAGE)
        return self._convert_to_face_detector_result(image, faces)
        
    
    def _convert_to_face_detector_result(self, image, faces) -> List[FaceDetectorResult]:
        bounding_boxes = [BoundingBox(Point(x, y), Point(x+w, y+h)) for (x, y, w, h) in faces]
        return [FaceDetectorResult(ImageUtils.crop(image, bounding_box), bounding_box) for bounding_box in bounding_boxes]
    
    def _convert_image_to_gray(self, image : np.ndarray) -> np.ndarray:
        import cv2
        return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

class RetinafaceFaceDetector(FaceDetector):
    def detect(self, image : np.ndarray) -> List[FaceDetectorResult]:
        faces = RetinaFace.detect_faces(image);
        return self._convert_to_face_detector_result(image, [faces[key]['facial_area'] for key in faces.keys()])
    
    def _convert_to_face_detector_result(self, image, faces) -> List[FaceDetectorResult]:
        bounding_boxes = [BoundingBox(Point(x, y), Point(w, h)) for (x, y, w, h) in faces]
        return [FaceDetectorResult(ImageUtils.crop(image, bounding_box), bounding_box) for bounding_box in bounding_boxes]

class MediaPipeFaceDetector(FaceDetector):
    def detect(self, image : np.ndarray) -> List[FaceDetectorResult]:
        BaseOptions = mp.tasks.BaseOptions
        FaceDetector = mp.tasks.vision.FaceDetector
        FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
        VisionRunningMode = mp.tasks.vision.RunningMode

        options = FaceDetectorOptions(
            base_options=BaseOptions(model_asset_path='./blaze_face_short_range.tflite'),
            running_mode=VisionRunningMode.IMAGE)

        with FaceDetector.create_from_options(options) as detector:
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
            face_detector_result = detector.detect(mp_image)
            return self._convert_to_face_detector_result(image, [(x.bounding_box.origin_x, x.bounding_box.origin_y, x.bounding_box.width, x.bounding_box.height) for x in face_detector_result.detections])
        
    def _convert_to_face_detector_result(self, image, faces) -> List[FaceDetectorResult]:
        bounding_boxes = [BoundingBox(Point(x, y), Point(x + w, y + h)) for (x, y, w, h) in faces]
        return [FaceDetectorResult(ImageUtils.crop(image, bounding_box), bounding_box) for bounding_box in bounding_boxes]
    

In [6]:
class MockFaceQualifier(FaceQualifier):
    def qualify(self, face_detector_result):
        return Person(
            age=43, 
            genre=Genre.male(), 
            emotions=[Emotions('happy', 0.8),
                      Emotions('sad', 0.5),
                      Emotions('angry', 0.3)
                      ], 
            bounding_box=face_detector_result.bounding_box, 
            image=face_detector_result.image)

In [7]:
class FaceQualificationDisplay:
    def show():
        pass

In [8]:
import cv2 as cv

class OpenCVFaceQualificationDisplay(FaceQualificationDisplay):
    def __init__(self, people : List[Person], frame : np.ndarray) -> None:
        self.people = people
        self.frame = frame
    
    def display_emotions_right_of_person(self, person: Person, padding_x: int, padding_y: int) -> None:
        width = 200
        border_width =2
        rectangle_origin = (person.bounding_box.end.x + border_width, person.bounding_box.origin.y - border_width)
        rectangle_end = (rectangle_origin[0] + width, person.bounding_box.end.y + border_width)
        if rectangle_end[0] > self.frame.shape[1]:
            rectangle_end = (self.frame.shape[1], rectangle_end[1])
        if rectangle_end[1] > self.frame.shape[0]:
            rectangle_end = (rectangle_end[0], self.frame.shape[0])
        #blur the rectangle background
        self.frame[rectangle_origin[1]:rectangle_end[1], rectangle_origin[0]:rectangle_end[0]] = cv.GaussianBlur(self.frame[rectangle_origin[1]:rectangle_end[1], rectangle_origin[0]:rectangle_end[0]], (75, 75), 0) / 1.5

        font_weight=1
        font_size=0.8
        text = "Emotions"
        text_size, _ = cv.getTextSize(text, cv.FONT_HERSHEY_DUPLEX, font_size, font_weight)
        text_width, text_height = text_size
        text_x = rectangle_origin[0] + (rectangle_end[0] - rectangle_origin[0]) // 2 - text_width // 2
        text_y = rectangle_origin[1] + text_height + padding_y // 2
        cv.putText(self.frame, text, (text_x, text_y), cv.FONT_HERSHEY_DUPLEX, font_size, (255, 255, 255), font_weight, lineType = cv2.LINE_AA)

        gap = 40

        for x in person.emotions:
            #draw text of emotion
            font_weight=1
            font_size=0.5
            text = x.name + ": " + str(x.quantity)
            text_size, _ = cv.getTextSize(text, cv.FONT_HERSHEY_DUPLEX, font_size, font_weight)
            text_width, text_height = text_size
            text_x = rectangle_origin[0] + (rectangle_end[0] - rectangle_origin[0]) // 2 - text_width // 2
            text_y = (rectangle_origin[1] + text_height + padding_y // 2) + gap
            cv.putText(self.frame, text, (text_x, text_y), cv.FONT_HERSHEY_DUPLEX, font_size, (255, 255, 255), font_weight, lineType = cv2.LINE_AA)
            gap+=30


    def display_age_on_top_of_person(self, person: Person, padding_x: int, padding_y: int) -> None:
        gap = 26
        font_thinkness = 2
        icon_size = 38
        triangle_size = 20

        text = str(person.age)
        text_size, _ = cv.getTextSize(text, cv.FONT_HERSHEY_DUPLEX, 1.5, font_thinkness)
        text_width, text_height = text_size

        rectangle_origin = ((person.bounding_box.origin.x + (person.bounding_box.end.x - person.bounding_box.origin.x) // 2 - (text_width + padding_x) // 2), person.bounding_box.origin.y - text_height - padding_y - 50)
        rectangle_end = (rectangle_origin[0] + text_width + padding_x, rectangle_origin[1] + text_height + padding_y)

        # Draw the rectangle with a pointer
        cv.rectangle(self.frame, rectangle_origin, rectangle_end, (67, 193, 246), cv.FILLED)

        # Center the text within the rectangle
        text_x = rectangle_origin[0] + (rectangle_end[0] - rectangle_origin[0]) // 2 - text_width // 2
        text_y = rectangle_origin[1] + text_height + padding_y // 2

        # Draw the text
        cv.putText(self.frame, text, (text_x - gap, text_y), cv.FONT_HERSHEY_DUPLEX, 1.5, (47, 123, 222), font_thinkness, lineType = cv2.LINE_AA)

        triangle = np.array([[rectangle_origin[0] + (rectangle_end[0] - rectangle_origin[0]) // 2 - triangle_size + 5, rectangle_end[1]],
                            [rectangle_origin[0] + (rectangle_end[0] - rectangle_origin[0]) // 2 + triangle_size - 5, rectangle_end[1]],
                            [rectangle_origin[0] + (rectangle_end[0] - rectangle_origin[0]) // 2, rectangle_end[1] + triangle_size] ], np.int32)
        
        cv.drawContours(self.frame, [triangle], 0, (67, 193, 246), cv.FILLED)

        male_icon = cv.imread("./assets/male.png" if person.genre.isMale() else "./assets/female.png")  # Correct the path to your male icon image
        if male_icon is not None:
            male_icon = cv.resize(male_icon, (icon_size, icon_size))
            rectangle_center = (rectangle_origin[0] + (rectangle_end[0] - rectangle_origin[0]) // 2, rectangle_origin[1] + icon_size // 2 + padding_y//2)
            self.overlay_icon(male_icon, (rectangle_center[0] + gap, rectangle_center[1]))
        else:
            print("Male icon not found or there's an error in reading the image.")
    
    def overlay_icon(self, icon, center):
        """Overlay an icon image at the specified top left corner position."""
        h, w, _ = icon.shape
        x, y = center
        # Check if the coordinates are within the frame bounds
        #draw only if the icon fits inside the frame
        if x-w//2 >= 0 and y-h//2 >= 0 and x+w//2 < self.frame.shape[1] and y+h//2 < self.frame.shape[0]:
            self.frame[y-h//2:y+h//2, x-w//2:x+w//2] = icon
    
    def display_box_around_person(self, person : Person) -> None:
        cv.rectangle(self.frame, (person.bounding_box.origin.x, 
                                  person.bounding_box.origin.y), 
                                  (person.bounding_box.end.x, 
                                   person.bounding_box.end.y), 
                                   (67, 193, 246), 2)
    
    def show(self):
        for person in self.people:
            self.display_box_around_person(person)
            self.display_age_on_top_of_person(person, 130, 38)
            self.display_emotions_right_of_person(person, 130, 38)


In [9]:

class MockFaceComparator(FaceComparator):
    def qualify(self, first_face, second_face):
        return FaceComparatorResult(0.8)

In [10]:
class DeepfaceFaceComparator(FaceComparator):
    def qualify(self, first_face, second_face):
        pass

# CV2 INTERFACE

In [11]:
class FaceQualificationDisplay(ABC):
    @abstractmethod
    def show(self) -> None:
        pass


In [88]:
import cv2 as cv

video = cv.VideoCapture(0)
face_detector = MediaPipeFaceDetector()
qualifier = MockFaceQualifier()

while True:
    ret, frame = video.read()
    faces = face_detector.detect(frame)
    qualifications = [qualifier.qualify(face) for face in faces]
    display = OpenCVFaceQualificationDisplay(qualifications, frame)
    display.show()

    cv.imshow('Video', frame)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

I0000 00:00:1701812151.727874       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-4.12.7), renderer: AMD Radeon Pro 5300M OpenGL Engine
I0000 00:00:1701812151.792130       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-4.12.7), renderer: AMD Radeon Pro 5300M OpenGL Engine
I0000 00:00:1701812151.857117       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-4.12.7), renderer: AMD Radeon Pro 5300M OpenGL Engine
I0000 00:00:1701812151.923571       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-4.12.7), renderer: AMD Radeon Pro 5300M OpenGL Engine
I0000 00:00:1701812151.990860       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-4.12.7), renderer: AMD Radeon Pro 5300M OpenGL Engine
I0000 00:00:1701812152.057504       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-4.12.7), renderer: AMD Radeon Pro 5300M OpenGL Engine
I0000 00:00:1701812152.123969       1 gl_context.cc:344] GL version: 2.1 (2.1 ATI-4.12.7), renderer: AMD Radeon Pro 5300M OpenGL Engine
I0000 00:00:1701812152.191019       1 gl_context

KeyboardInterrupt: 

# FACE AUTHENTICATOR

## MODEL

In [25]:
class AuthenticationScreen(ABC):
    @abstractmethod
    def display(self) -> None:
        pass

In [102]:
from typing import Callable
from functools import reduce


@dataclass
class Transition:
    next_state : str
    actions: List[Callable[[], bool]]

    @staticmethod
    def to(next_state : str) -> 'Transition':
        return Transition(next_state, [])

    def when(self, action : Callable[[], bool]) -> 'Transition':
        self.actions.append(action)
        return self

    def evaluate_transition(self) -> bool:
        for action in self.actions:
            if action():
                return True
        return False

@dataclass
class State:
    is_initial : bool
    name : str
    screen : Callable[[np.ndarray], AuthenticationScreen]
    transitions : List[Transition]
    on_enter : Callable[[], None] = lambda : None
    on_exit : Callable[[], None] = lambda : None

    @staticmethod
    def default(name : str, screen : Callable[[np.ndarray], AuthenticationScreen]) -> 'State':
        return State(True, name, screen, [])
    
    @staticmethod
    def of(name : str, screen : Callable[[np.ndarray], AuthenticationScreen]) -> 'State':
        return State(False, name, screen, [])

    def do(self, transition : Transition) -> 'State':
        self.transitions.append(transition)
        return self
    
    def do_on_enter(self, action : Callable[[], None]) -> 'State':
        self.on_enter = action
        return self

In [93]:
class StateMachine(ABC):
    def __init__(self, states: List[State] ) -> None:
        self.states = states
        self.current_state = list(filter(lambda x: x.is_initial, states))[0]
        self.current_state.on_enter()
    
    def execute(self, frame : np.array):
        self.current_state.screen(frame).display()
        self.evaluate_conditions()
    
    def evaluate_conditions(self) -> None:
        for transition in self.current_state.transitions:
            if transition.evaluate_transition():
                self.current_state.on_exit()
                self.current_state = list(filter(lambda x: x.name == transition.next_state, self.states))[0]
                self.current_state.on_enter()
                break
        

In [91]:

class IconAuthenticationScreen(AuthenticationScreen):
    def __init__(self, frame : np.ndarray, icon_path : str, color : tuple, text : str, bg_color : str = None, alpha : float = 0.15) -> None:
        self.frame = frame
        self.icon_path = icon_path
        self.color = color
        self.text = text
        self.bg_color = self.color if(bg_color == None) else bg_color
        self.alpha  = alpha

    def overlay_green_screen(self):
        # Create a green screen of the same size as the frame
        screen = np.full(self.frame.shape, self.bg_color, dtype=np.uint8)
        alpha = self.alpha
        self.frame[:,:] = cv2.addWeighted(self.frame, 1 - alpha, screen, alpha, 0)

    def display(self) -> None:
        self.overlay_green_screen()
        ImageUtils.overlay_icon(image=self.frame, 
                                icon_path=self.icon_path, 
                                color=self.color, 
                                icon_size=200, 
                                point=(self.frame.shape[1] // 2, self.frame.shape[0] // 2))
        #draw text that says Press any key to unlock
        font_weight=2
        font_size=1.2
        text = self.text
        text_size, _ = cv.getTextSize(text, cv.FONT_HERSHEY_DUPLEX, font_size, font_weight)
        text_width, text_height = text_size
        text_x = self.frame.shape[1] // 2 - text_width // 2
        text_y = self.frame.shape[0] // 2 + text_height + 150
        cv.putText(self.frame, text, (text_x, text_y), cv.FONT_HERSHEY_DUPLEX, font_size, self.color, font_weight, lineType = cv2.LINE_AA)

In [13]:
import numpy as np

class LockAuthenticationScreen(IconAuthenticationScreen):
    def __init__(self, frame: np.ndarray) -> None:
        super().__init__(frame, "./assets/lock.png", (255, 255, 255), "PRESS A KEY TO START FACE RECOGNITION", bg_color=(0, 0, 0), alpha=0.4)
    

In [30]:
class AccessGrantedAuthenticationScreen(IconAuthenticationScreen):
    def __init__(self, frame: np.ndarray) -> None:
        super().__init__(frame, "./assets/success.png", (0, 255, 0), "ACCESS GRANTED", bg_color=(0, 255, 0), alpha = 0.15)

In [51]:
class AccessDeniedAuthenticationScreen(IconAuthenticationScreen):
    def __init__(self, frame: np.ndarray) -> None:
        super().__init__(frame, "./assets/danger.png", (0, 0, 255), "ACCESS DENIED", bg_color=(0, 0, 255), alpha = 0.15)

In [79]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

class FaceRecognizerAuthenticationScreen(AuthenticationScreen):
    def __init__(self, frame: np.ndarray ) -> None:
        self.frame = frame

    def display(self) -> None:
        self.draw_landmarks()
        self.draw_processing_text()
    
    def draw_processing_text(self):
        font_weight=2
        font_size=1.2
        text = "PROCESSING FACE"
        text_size, _ = cv.getTextSize(text, cv.FONT_HERSHEY_DUPLEX, font_size, font_weight)
        text_width, text_height = text_size
        text_x = self.frame.shape[1] // 2 - text_width // 2
        text_y = 300 
        cv.putText(self.frame, text, (text_x, text_y), cv.FONT_HERSHEY_DUPLEX, font_size, (255, 255, 255), font_weight, lineType = cv2.LINE_AA)


    def draw_landmarks(self):
        base_options = python.BaseOptions(model_asset_path='./face_landmarker.task')
        options = vision.FaceLandmarkerOptions(base_options=base_options,
                                            output_face_blendshapes=True,
                                            output_facial_transformation_matrixes=True)
        detector = vision.FaceLandmarker.create_from_options(options)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=self.frame)
        detection_result = detector.detect(mp_image)
        self.frame[::] = self.draw_landmarks_on_image(self.frame, detection_result)
    
    def draw_landmarks_on_image(self, rgb_image, detection_result):
        face_landmarks_list = detection_result.face_landmarks
        annotated_image = np.copy(rgb_image)

        # Loop through the detected faces to visualize.
        for idx in range(len(face_landmarks_list)):
            face_landmarks = face_landmarks_list[idx]

            # Draw the face landmarks.
            face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
            face_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
            ])

            solutions.drawing_utils.draw_landmarks(
                image=annotated_image,
                landmark_list=face_landmarks_proto,
                connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp.solutions.drawing_styles
                .get_default_face_mesh_tesselation_style())
            solutions.drawing_utils.draw_landmarks(
                image=annotated_image,
                landmark_list=face_landmarks_proto,
                connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp.solutions.drawing_styles
                .get_default_face_mesh_contours_style())
            solutions.drawing_utils.draw_landmarks(
                image=annotated_image,
                landmark_list=face_landmarks_proto,
                connections=mp.solutions.face_mesh.FACEMESH_IRISES,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp.solutions.drawing_styles
                .get_default_face_mesh_iris_connections_style())

        return annotated_image

In [95]:
class InputManager:
    @staticmethod
    def is_space_pressed() -> bool:
        return cv.waitKey(1) & 0xFF == ord(' ')

## STATE MACHINE CREATION

In [103]:
import cv2 as cv
import threading
import random

#exist a timer
is_authorizated = False
finished_recognition = False

def set_authorizated():
    global is_authorizated
    global finished_recognition
    is_authorizated = random.choice([True, False])
    finished_recognition=True


timer = threading.Timer(5.0, lambda: set_authorizated())

def is_access_granted() -> bool:
    global is_authorizated
    global finished_recognition
    return is_authorizated and finished_recognition

def is_access_denied() -> bool:
    global is_authorizated
    global finished_recognition
    return not is_authorizated and finished_recognition


video = cv.VideoCapture(0)

state_machine = StateMachine([
    State.default(
          name="LOCK", 
          screen=LockAuthenticationScreen)
                .do(Transition.to("RECOGNIZING").when(InputManager.is_space_pressed)),
    State.of( 
          name="RECOGNIZING", 
          screen=FaceRecognizerAuthenticationScreen)
            .do(Transition.to("GRANTED").when(is_access_granted))
            .do_on_enter(timer.start),
    State.of(
          name="GRANTED", 
          screen =AccessGrantedAuthenticationScreen)
                .do(Transition.to("LOCK").when(InputManager.is_space_pressed)),
    State.of(
          name="DENIED", 
          screen=AccessDeniedAuthenticationScreen)
          .do(Transition.to("LOCK").when(InputManager.is_space_pressed))
])

while True:
    ret, frame = video.read()
    state_machine.execute(frame)
    cv.imshow('Video', frame)

    if cv.waitKey(1) & 0xFF == ord('q'):
        break

TypeError: State.of() got an unexpected keyword argument 'on_enter'