In [14]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

# 打印出全部信息

In [15]:
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    print('gesture recognition result: {}'.format(result))

options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

# 打印出类别信息

In [2]:
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    if result.gestures:
        gesture = result.gestures[0][0]  # 取出第一个手势
        print('Gesture: {}'.format(gesture.category_name))
    else:
        print('No gesture detected')
        
options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

# 类别不显示在视频上

In [18]:
with GestureRecognizer.create_from_options(options) as recognizer:
    # 打开摄像头
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("忽略空帧")
            continue

        # 转换为RGB图像
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # 创建MediaPipe图像对象
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # 获取当前时间戳
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # 处理图像
        recognizer.recognize_async(mp_image, timestamp_ms)

        # 显示视频流
        cv2.imshow('Gesture Recognition', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
        

    cap.release()
    cv2.destroyAllWindows()

gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture 

gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture 

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.6187477111816406, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9951965808868408, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.7802848815917969, y=0.7780007123947144, z=-3.7353035509113397e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7395079135894775, y=0.6268243789672852, z=0.008938446640968323, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6583031415939331, y=0.485958456993103, z=0.0007477550534531474, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6060088276863098, y=0.37408116459846497, z=-0.010828730650246143, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.610599935054779, y=0.27525794506073, z=-0.023867711424827576, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5154380798339844, y=0.4970196783542633, z=-0.01363775972276926, visibility=0.0, presence=0.0), NormalizedLandm

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.6139744520187378, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9938767552375793, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.7441705465316772, y=0.7460597157478333, z=-3.941097759252443e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7007117867469788, y=0.5917264819145203, z=0.016280917450785637, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.623970627784729, y=0.45475178956985474, z=0.007772710174322128, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5660973191261292, y=0.3444899916648865, z=-0.004446105100214481, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.56926029920578, y=0.24001073837280273, z=-0.020135290920734406, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.4689877927303314, y=0.47435712814331055, z=-0.015027990564703941, visibility=0.0, presence=0.0), NormalizedLan

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.6197209358215332, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9917376041412354, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.7541873455047607, y=0.7614823579788208, z=-3.7160259580559796e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7025511860847473, y=0.6034145951271057, z=0.011311895214021206, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6231710314750671, y=0.4689542353153229, z=0.0012986031360924244, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5696127414703369, y=0.3591955602169037, z=-0.012528742663562298, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5672565698623657, y=0.2579576075077057, z=-0.029812321066856384, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.46770474314689636, y=0.5056607127189636, z=-0.015048735775053501, visibility=0.0, presence=0.0), Normalized

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.6355770826339722, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9823005795478821, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.7544847726821899, y=0.7932568788528442, z=-3.1451904192181246e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7063174247741699, y=0.6347830891609192, z=0.010172831825911999, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6360759139060974, y=0.5031879544258118, z=-0.0009369673789478838, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5857702493667603, y=0.39420318603515625, z=-0.015560513362288475, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5813958644866943, y=0.29562532901763916, z=-0.034008659422397614, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.48015543818473816, y=0.5505830645561218, z=-0.022341782227158546, visibility=0.0, presence=0.0), Normali

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.6308879256248474, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9836350679397583, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.7587812542915344, y=0.8612143397331238, z=-1.835200293953676e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7010751962661743, y=0.7069220542907715, z=0.004214818589389324, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6259805560112, y=0.5722551941871643, z=-0.012306497432291508, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.574663519859314, y=0.4544830918312073, z=-0.031824637204408646, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5646127462387085, y=0.35016772150993347, z=-0.05361558496952057, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.4585363268852234, y=0.6255684494972229, z=-0.02392398938536644, visibility=0.0, presence=0.0), NormalizedLandmar

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.632563591003418, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9820231795310974, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.7601037621498108, y=0.8286809325218201, z=-3.3432326063120854e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7126744985580444, y=0.6765421628952026, z=0.009615602903068066, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6407380104064941, y=0.545913577079773, z=0.0004891346325166523, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5869614481925964, y=0.44061365723609924, z=-0.01233047153800726, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.58015376329422, y=0.3446001708507538, z=-0.027828743681311607, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.4883367717266083, y=0.5909218192100525, z=-0.008901034481823444, visibility=0.0, presence=0.0), NormalizedLandm

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.5925805568695068, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9953110814094543, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.8137644529342651, y=0.698869526386261, z=-4.579692927109136e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7774564027786255, y=0.5504677891731262, z=0.016478508710861206, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7119153738021851, y=0.4208667278289795, z=0.013727378100156784, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6695627570152283, y=0.31699997186660767, z=0.0054114689119160175, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6686840653419495, y=0.22968646883964539, z=-0.004394072107970715, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.5745491981506348, y=0.4255222678184509, z=0.005532534327358007, visibility=0.0, presence=0.0), NormalizedLan

gesture recognition result: GestureRecognizerResult(gestures=[[Category(index=-1, score=0.5963507890701294, display_name='', category_name='Thumb_Up')]], handedness=[[Category(index=1, score=0.9956811666488647, display_name='Left', category_name='Left')]], hand_landmarks=[[NormalizedLandmark(x=0.859218955039978, y=0.7775886654853821, z=-7.633933023498685e-07, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.8081589937210083, y=0.6304219961166382, z=0.016804682090878487, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.7138718366622925, y=0.4864193797111511, z=0.006356352940201759, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.665627121925354, y=0.3677372634410858, z=-0.010688919574022293, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6693432927131653, y=0.26158982515335083, z=-0.029754428192973137, visibility=0.0, presence=0.0), NormalizedLandmark(x=0.6492359638214111, y=0.46351131796836853, z=-0.048431579023599625, visibility=0.0, presence=0.0), NormalizedLan

gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture recognition result: GestureRecognizerResult(gestures=[], handedness=[], hand_landmarks=[], hand_world_landmarks=[])
gesture 

# 类别显示在视频上

In [4]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

# 全局变量，用于存储当前手势类别
current_gesture = "No gesture detected"

def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    global current_gesture
    if result.gestures:
        gesture = result.gestures[0][0]  # 取出第一个手势
        current_gesture = gesture.category_name
    else:
        current_gesture = "No gesture detected"

options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

with GestureRecognizer.create_from_options(options) as recognizer:
    # 打开摄像头
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("忽略空帧")
            continue

        # 转换为RGB图像
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # 创建MediaPipe图像对象
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # 获取当前时间戳
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # 处理图像
        recognizer.recognize_async(mp_image, timestamp_ms)

        # 将手势类别名称绘制到视频帧上
        cv2.putText(frame, f'Gesture: {current_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # 显示视频流
        cv2.imshow('Gesture Recognition', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
        
    cap.release()
    cv2.destroyAllWindows()




In [None]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

In [None]:
def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    if result.gestures:
        gesture = result.gestures[0][0]  # 取出第一个手势
        print('Gesture: {}'.format(gesture.category_name))
    else:
        print('No gesture detected')
        
    if result.hand_landmarks:
        hand_landmarks = result.hand_landmarks[0]
    else:
        hand_landmarks = []
        
options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

In [23]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

# 全局变量，用于存储当前手势类别
current_gesture = "No gesture detected"
hand_landmarks = []

def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    global current_gesture, hand_landmarks
    if result.gestures:
        gesture = result.gestures[0][0]  # 取出第一个手势
        current_gesture = gesture.category_name
    else:
        current_gesture = "No gesture detected"
        
    if result.hand_landmarks:
        hand_landmarks = result.hand_landmarks[0]
    else:
        hand_landmarks = []

options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

with GestureRecognizer.create_from_options(options) as recognizer:
    # 打开摄像头
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("忽略空帧")
            continue

        # 转换为RGB图像
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # 创建MediaPipe图像对象
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # 获取当前时间戳
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # 处理图像
        recognizer.recognize_async(mp_image, timestamp_ms)

        # 将手势类别名称绘制到视频帧上
        cv2.putText(frame, f'Gesture: {current_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
        if hand_landmarks:
            for landmark in hand_landmarks:
                x = int(landmark.x * frame.shape[1])
                y = int(landmark.y * frame.shape[0])
                cv2.circle(frame, (x, y), 5, (255, 0, 0), -1)  # Blue dot
            
        cv2.imshow('Face and Gesture Recognition', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
        
    cap.release()
    cv2.destroyAllWindows()


# 更换为剪刀石头布模型

In [36]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

# 全局变量，用于存储当前手势类别
current_gesture = "No gesture detected"

def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    global current_gesture
    if result.gestures:
        gesture = result.gestures[0][0]  # 取出第一个手势
        current_gesture = gesture.category_name
    else:
        current_gesture = "No gesture detected"

options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer (1).task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)

with GestureRecognizer.create_from_options(options) as recognizer:
    # 打开摄像头
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("忽略空帧")
            continue

        # 转换为RGB图像
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # 创建MediaPipe图像对象
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # 获取当前时间戳
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # 处理图像
        recognizer.recognize_async(mp_image, timestamp_ms)

        # 将手势类别名称绘制到视频帧上
        cv2.putText(frame, f'Gesture: {current_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # 显示视频流
        cv2.imshow('Gesture Recognition', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
        
    cap.release()
    cv2.destroyAllWindows()


# 人脸识别边框绘制

In [1]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

In [2]:
import mediapipe as mp

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
FaceDetectorResult = mp.tasks.vision.FaceDetectorResult
VisionRunningMode = mp.tasks.vision.RunningMode

In [6]:
import mediapipe as mp
import numpy as np

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
FaceDetectorResult = mp.tasks.vision.FaceDetectorResult
VisionRunningMode = mp.tasks.vision.RunningMode

global origin_x, origin_y, width, height, score, flag 
# origin_x = 0
# origin_y = 0
# width = 0
# height = 0
# score = 0
flag = False
# Create a face detector instance with the live stream mode:
def print_result(result: FaceDetectorResult, output_image: mp.Image, timestamp_ms: int):
    #face_position = result.detections[0]
    #print('face detector result: {}'.format(result))
    #print('face detector result: {}'.format(face_position))
    #global frame
    #annotated_image = visualize(frame, result)
    #cv2.imshow('Face Detection', annotated_image)
    global origin_x, origin_y, width, height, score, flag
    if not result.detections:
        print('No detections found.')
        flag = False
        return
    
    flag = True
    detection = result.detections[0]
    bounding_box = detection.bounding_box
    origin_x = bounding_box.origin_x
    origin_y = bounding_box.origin_y
    width = bounding_box.width
    height = bounding_box.height
    score = detection.categories[0].score
    print(f'origin_x={origin_x}, origin_y={origin_y}, width={width}, height={height}, score={score}')

options = FaceDetectorOptions(
    base_options = python.BaseOptions(model_asset_path='blaze_face_short_range.tflite'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)
    

In [7]:
with FaceDetector.create_from_options(options) as detector:
        # 打开摄像头
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("忽略空帧")
            continue

        # 转换为RGB图像
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # 创建MediaPipe图像对象
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # 获取当前时间戳
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # 处理图像
        # Send live image data to perform face detection.
        # The results are accessible via the `result_callback` provided in
        # the `FaceDetectorOptions` object.
        # The face detector must be created with the live stream mode.
        detector.detect_async(mp_image, timestamp_ms)
        
        if flag:
            top_left = (origin_x, origin_y)
            bottom_right = (origin_x + width, origin_y + height)
            color = (0, 255, 0)  # 绿色
            thickness = 2
            cv2.rectangle(frame, top_left, bottom_right, color, thickness)
            text = f'Score: {score:.2f}'
            cv2.putText(frame, text, (origin_x, origin_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        cv2.imshow('Face Detection', frame)
        #visualize(mp_image,result)
        #image_copy = np.copy(mp_image.numpy_view())
        #annotated_image = visualize(image_copy, detection_result)
        #rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
        #cv2_imshow(rgb_annotated_image)
        # 将手势类别名称绘制到视频帧上
        #cv2.putText(frame, f'Gesture: {current_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # 显示视频流
        #cv2.imshow('Face Detection', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
        
    cap.release()
    cv2.destroyAllWindows()

origin_x=218, origin_y=152, width=283, height=283, score=0.8946087956428528
origin_x=216, origin_y=152, width=285, height=285, score=0.8985354900360107
origin_x=216, origin_y=155, width=282, height=282, score=0.89390629529953
origin_x=215, origin_y=156, width=282, height=282, score=0.8986147046089172
origin_x=214, origin_y=156, width=283, height=283, score=0.8968557119369507
origin_x=214, origin_y=157, width=282, height=282, score=0.8919982314109802
origin_x=214, origin_y=159, width=281, height=281, score=0.8887177109718323
origin_x=212, origin_y=158, width=281, height=281, score=0.89225172996521
origin_x=213, origin_y=158, width=281, height=281, score=0.9046250581741333
origin_x=214, origin_y=159, width=277, height=277, score=0.9146954417228699
origin_x=213, origin_y=159, width=279, height=279, score=0.9086132049560547
origin_x=214, origin_y=160, width=277, height=277, score=0.9206318855285645
origin_x=213, origin_y=160, width=278, height=278, score=0.9197012782096863
origin_x=212, or

origin_x=199, origin_y=162, width=250, height=249, score=0.9316728115081787
origin_x=200, origin_y=162, width=249, height=249, score=0.936305820941925
origin_x=200, origin_y=162, width=249, height=249, score=0.936305820941925
origin_x=199, origin_y=161, width=249, height=249, score=0.9329270720481873
origin_x=200, origin_y=161, width=249, height=249, score=0.9382615685462952
origin_x=199, origin_y=162, width=249, height=249, score=0.9333956837654114
origin_x=199, origin_y=162, width=248, height=248, score=0.927605390548706
origin_x=199, origin_y=160, width=250, height=250, score=0.9132966995239258
origin_x=200, origin_y=162, width=248, height=248, score=0.9209833145141602
origin_x=201, origin_y=162, width=247, height=247, score=0.9175390601158142
origin_x=200, origin_y=161, width=247, height=247, score=0.9258106350898743
origin_x=202, origin_y=162, width=248, height=248, score=0.91909259557724
origin_x=201, origin_y=160, width=248, height=248, score=0.9196701049804688
origin_x=202, ori

origin_x=241, origin_y=141, width=236, height=236, score=0.9120858907699585
origin_x=239, origin_y=138, width=244, height=244, score=0.9162151217460632
origin_x=243, origin_y=143, width=238, height=238, score=0.9098720550537109
origin_x=243, origin_y=142, width=241, height=241, score=0.9116430282592773
origin_x=243, origin_y=141, width=244, height=244, score=0.9122574925422668
origin_x=243, origin_y=141, width=244, height=244, score=0.9122574925422668
origin_x=246, origin_y=142, width=243, height=243, score=0.9153271317481995
origin_x=246, origin_y=144, width=243, height=243, score=0.9256587028503418
origin_x=249, origin_y=148, width=241, height=241, score=0.9255079627037048
origin_x=249, origin_y=149, width=241, height=241, score=0.93382728099823
origin_x=248, origin_y=148, width=242, height=242, score=0.9324577450752258
origin_x=249, origin_y=148, width=244, height=244, score=0.9317730069160461
origin_x=247, origin_y=149, width=246, height=246, score=0.9370030760765076
origin_x=246, 

origin_x=175, origin_y=150, width=226, height=226, score=0.9363552331924438
origin_x=175, origin_y=150, width=226, height=226, score=0.9363552331924438
origin_x=179, origin_y=151, width=227, height=227, score=0.9429682493209839
origin_x=182, origin_y=153, width=225, height=225, score=0.936177670955658
origin_x=184, origin_y=152, width=225, height=225, score=0.9343851208686829
origin_x=186, origin_y=151, width=228, height=228, score=0.9232128262519836
origin_x=186, origin_y=151, width=230, height=230, score=0.9265576004981995
origin_x=193, origin_y=156, width=225, height=225, score=0.9152153134346008
origin_x=195, origin_y=156, width=226, height=226, score=0.9018930196762085
origin_x=198, origin_y=158, width=229, height=229, score=0.8911685347557068
origin_x=202, origin_y=160, width=228, height=228, score=0.8887996077537537
origin_x=201, origin_y=159, width=233, height=233, score=0.8973329663276672
origin_x=204, origin_y=162, width=231, height=231, score=0.8913145065307617
origin_x=204,

origin_x=199, origin_y=162, width=243, height=243, score=0.8947654962539673
origin_x=198, origin_y=162, width=245, height=245, score=0.8906097412109375
origin_x=197, origin_y=162, width=245, height=245, score=0.8884457349777222
origin_x=197, origin_y=161, width=247, height=247, score=0.8989303708076477
origin_x=197, origin_y=161, width=248, height=248, score=0.8924869298934937
origin_x=198, origin_y=161, width=247, height=247, score=0.8992127180099487
origin_x=198, origin_y=163, width=247, height=246, score=0.8950058817863464
origin_x=198, origin_y=161, width=249, height=249, score=0.9115655422210693
origin_x=201, origin_y=162, width=247, height=247, score=0.9002944827079773
origin_x=202, origin_y=162, width=247, height=247, score=0.9072839617729187
origin_x=204, origin_y=162, width=247, height=247, score=0.9182611107826233


In [None]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

BaseOptions = mp.tasks.BaseOptions
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
VisionRunningMode = mp.tasks.vision.RunningMode
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

In [None]:
import mediapipe as mp

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
FaceDetectorResult = mp.tasks.vision.FaceDetectorResult
VisionRunningMode = mp.tasks.vision.RunningMode

In [None]:
import mediapipe as mp
import numpy as np

BaseOptions = mp.tasks.BaseOptions
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
FaceDetectorResult = mp.tasks.vision.FaceDetectorResult
VisionRunningMode = mp.tasks.vision.RunningMode

global origin_x, origin_y, width, height, score, flag 
# origin_x = 0
# origin_y = 0
# width = 0
# height = 0
# score = 0
flag = False
# Create a face detector instance with the live stream mode:
def print_result(result: FaceDetectorResult, output_image: mp.Image, timestamp_ms: int):
    #face_position = result.detections[0]
    #print('face detector result: {}'.format(result))
    #print('face detector result: {}'.format(face_position))
    #global frame
    #annotated_image = visualize(frame, result)
    #cv2.imshow('Face Detection', annotated_image)
    global origin_x, origin_y, width, height, score, flag
    if not result.detections:
        print('No detections found.')
        flag = False
        return
    
    flag = True
    detection = result.detections[0]
    bounding_box = detection.bounding_box
    origin_x = bounding_box.origin_x
    origin_y = bounding_box.origin_y
    width = bounding_box.width
    height = bounding_box.height
    score = detection.categories[0].score
    print(f'origin_x={origin_x}, origin_y={origin_y}, width={width}, height={height}, score={score}')

options = FaceDetectorOptions(
    base_options = python.BaseOptions(model_asset_path='blaze_face_short_range.tflite'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)
    

In [None]:
with FaceDetector.create_from_options(options) as detector:
        # 打开摄像头
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("忽略空帧")
            continue

        # 转换为RGB图像
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # 创建MediaPipe图像对象
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # 获取当前时间戳
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # 处理图像
        # Send live image data to perform face detection.
        # The results are accessible via the `result_callback` provided in
        # the `FaceDetectorOptions` object.
        # The face detector must be created with the live stream mode.
        detector.detect_async(mp_image, timestamp_ms)
        
        if flag:
            top_left = (origin_x, origin_y)
            bottom_right = (origin_x + width, origin_y + height)
            color = (0, 255, 0)  # 绿色
            thickness = 2
            cv2.rectangle(frame, top_left, bottom_right, color, thickness)
            text = f'Score: {score:.2f}'
            cv2.putText(frame, text, (origin_x, origin_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        cv2.imshow('Face Detection', frame)
        #visualize(mp_image,result)
        #image_copy = np.copy(mp_image.numpy_view())
        #annotated_image = visualize(image_copy, detection_result)
        #rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
        #cv2_imshow(rgb_annotated_image)
        # 将手势类别名称绘制到视频帧上
        #cv2.putText(frame, f'Gesture: {current_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

        # 显示视频流
        #cv2.imshow('Face Detection', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
        
    cap.release()
    cv2.destroyAllWindows()

# 人脸识别+手势检测

In [33]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Base options for both face and gesture detection
BaseOptions = mp.tasks.BaseOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Face detection options and classes
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
FaceDetectorResult = mp.tasks.vision.FaceDetectorResult

# Gesture recognition options and classes
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

# Global variables for face detection
origin_x, origin_y, width, height, score, flag = 0, 0, 0, 0, 0, False
current_gesture = "No gesture detected"
hand_landmarks = []

# Callback function for face detection results
def print_face_result(result: FaceDetectorResult, output_image: mp.Image, timestamp_ms: int):
    global origin_x, origin_y, width, height, score, flag
    if not result.detections:
        print('No face detections found.')
        flag = False
        return
    
    flag = True
    detection = result.detections[0]
    bounding_box = detection.bounding_box
    origin_x = bounding_box.origin_x
    origin_y = bounding_box.origin_y
    width = bounding_box.width
    height = bounding_box.height
    score = detection.categories[0].score
    print(f'origin_x={origin_x}, origin_y={origin_y}, width={width}, height={height}, score={score}')

# Callback function for gesture recognition results
def print_gesture_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    global current_gesture, hand_landmarks
    if result.gestures:
        gesture = result.gestures[0][0]  # Take the first gesture
        current_gesture = gesture.category_name
    else:
        current_gesture = "No gesture detected"
    
    if result.hand_landmarks:
        hand_landmarks = result.hand_landmarks[0]
    else:
        hand_landmarks = []

# Options for face detection
face_options = FaceDetectorOptions(
    base_options=BaseOptions(model_asset_path='blaze_face_short_range.tflite'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_face_result
)

# Options for gesture recognition
gesture_options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_gesture_result
)

# Initialize face detector and gesture recognizer
with FaceDetector.create_from_options(face_options) as face_detector, \
     GestureRecognizer.create_from_options(gesture_options) as gesture_recognizer:
    
    # Open the camera
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Ignoring empty frame")
            continue

        # Convert to RGB image
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Create MediaPipe image object
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # Get current timestamp
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # Process image for face detection
        face_detector.detect_async(mp_image, timestamp_ms)
        
        # Process image for gesture recognition
        gesture_recognizer.recognize_async(mp_image, timestamp_ms)

        # Draw face bounding box if a face is detected
        if flag:
            top_left = (origin_x, origin_y)
            bottom_right = (origin_x + width, origin_y + height)
            color = (0, 255, 0)  # Green
            thickness = 2
            cv2.rectangle(frame, top_left, bottom_right, color, thickness)
            text = f'Score: {score:.2f}'
            cv2.putText(frame, text, (origin_x, origin_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        
        # Draw gesture recognition result
        cv2.putText(frame, f'Gesture: {current_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
        if hand_landmarks:
            for landmark in hand_landmarks:
                x = int(landmark.x * frame.shape[1])
                y = int(landmark.y * frame.shape[0])
                cv2.circle(frame, (x, y), 5, (255, 0, 0), -1)  # Blue dot

        # Display video stream
        cv2.imshow('Face and Gesture Recognition', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
    
    cap.release()
    cv2.destroyAllWindows()


origin_x=210, origin_y=207, width=210, height=210, score=0.9101529121398926
origin_x=218, origin_y=204, width=212, height=212, score=0.9181281328201294
origin_x=220, origin_y=206, width=211, height=211, score=0.9252174496650696
origin_x=218, origin_y=203, width=211, height=211, score=0.9125062823295593
origin_x=221, origin_y=207, width=208, height=208, score=0.9143196940422058
origin_x=222, origin_y=206, width=206, height=206, score=0.9161774516105652
origin_x=220, origin_y=206, width=206, height=206, score=0.9329074025154114
origin_x=221, origin_y=203, width=207, height=207, score=0.9239675402641296
origin_x=222, origin_y=206, width=205, height=205, score=0.9288376569747925
origin_x=220, origin_y=205, width=205, height=205, score=0.930687427520752
origin_x=217, origin_y=203, width=207, height=207, score=0.9334518909454346
origin_x=215, origin_y=202, width=207, height=207, score=0.9249897599220276
origin_x=213, origin_y=204, width=206, height=206, score=0.923653781414032
origin_x=211, 

origin_x=0, origin_y=215, width=194, height=194, score=0.9159181118011475
origin_x=0, origin_y=214, width=195, height=195, score=0.9144871234893799
origin_x=0, origin_y=216, width=194, height=194, score=0.9141486883163452
origin_x=0, origin_y=214, width=196, height=196, score=0.9160768985748291
origin_x=0, origin_y=211, width=199, height=199, score=0.9191262722015381
origin_x=0, origin_y=211, width=198, height=198, score=0.918317973613739
origin_x=0, origin_y=214, width=194, height=194, score=0.896176278591156
origin_x=442, origin_y=110, width=147, height=147, score=0.9033154249191284
origin_x=440, origin_y=106, width=152, height=152, score=0.9022320508956909
origin_x=0, origin_y=211, width=197, height=197, score=0.8897972702980042
origin_x=0, origin_y=212, width=196, height=196, score=0.8891796469688416
origin_x=0, origin_y=215, width=196, height=196, score=0.8961207866668701
origin_x=0, origin_y=214, width=196, height=196, score=0.917748212814331
origin_x=439, origin_y=107, width=151

origin_x=433, origin_y=113, width=142, height=142, score=0.9328863024711609
origin_x=432, origin_y=111, width=145, height=145, score=0.9309161305427551
origin_x=433, origin_y=114, width=142, height=142, score=0.9332882165908813
origin_x=433, origin_y=111, width=146, height=146, score=0.9293271899223328
origin_x=433, origin_y=112, width=144, height=144, score=0.9343585968017578
origin_x=431, origin_y=110, width=147, height=147, score=0.9318278431892395
origin_x=431, origin_y=109, width=148, height=148, score=0.938520073890686
origin_x=431, origin_y=109, width=148, height=148, score=0.9442806839942932
origin_x=431, origin_y=108, width=147, height=147, score=0.946657121181488
origin_x=431, origin_y=109, width=147, height=147, score=0.9386526346206665
origin_x=432, origin_y=108, width=147, height=147, score=0.9284559488296509
origin_x=433, origin_y=106, width=146, height=146, score=0.9443798661231995
origin_x=431, origin_y=108, width=147, height=147, score=0.9414024949073792
origin_x=432, 

origin_x=432, origin_y=101, width=150, height=150, score=0.894721269607544
origin_x=436, origin_y=99, width=144, height=144, score=0.9325741529464722
origin_x=434, origin_y=99, width=145, height=145, score=0.93495774269104
origin_x=434, origin_y=99, width=148, height=148, score=0.9218548536300659
origin_x=433, origin_y=98, width=148, height=148, score=0.917546272277832
origin_x=435, origin_y=98, width=144, height=144, score=0.9166786670684814
origin_x=434, origin_y=98, width=147, height=147, score=0.9150501489639282
origin_x=436, origin_y=97, width=143, height=143, score=0.9152743220329285
origin_x=434, origin_y=96, width=145, height=145, score=0.9154338240623474
origin_x=434, origin_y=94, width=145, height=145, score=0.9069142937660217
origin_x=430, origin_y=97, width=143, height=143, score=0.9087429642677307
origin_x=430, origin_y=96, width=142, height=142, score=0.9286259412765503
origin_x=429, origin_y=97, width=142, height=142, score=0.9285703301429749
origin_x=430, origin_y=97, w

origin_x=434, origin_y=100, width=138, height=138, score=0.902370810508728
origin_x=435, origin_y=100, width=134, height=134, score=0.9378243088722229
origin_x=435, origin_y=100, width=134, height=134, score=0.9378243088722229
origin_x=433, origin_y=98, width=138, height=138, score=0.9181397557258606
origin_x=438, origin_y=101, width=138, height=138, score=0.9081335067749023
origin_x=442, origin_y=102, width=133, height=133, score=0.8891074657440186
origin_x=439, origin_y=101, width=139, height=139, score=0.9034172892570496
origin_x=442, origin_y=98, width=136, height=136, score=0.9206894636154175
origin_x=439, origin_y=95, width=137, height=137, score=0.9125206470489502
origin_x=437, origin_y=94, width=142, height=142, score=0.9204518795013428
origin_x=439, origin_y=99, width=140, height=140, score=0.906585156917572
origin_x=442, origin_y=99, width=138, height=138, score=0.9208816885948181
origin_x=441, origin_y=96, width=143, height=143, score=0.9041326642036438
origin_x=440, origin_

No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
origin_x=427, origin_y=52, width=184, height=184, score=0.5703005194664001
origin_x=433, origin_y=67, width=173, height=173, score=0.5352263450622559
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
No face detections found.
origin_x=436, origin_y=76, width=160, height=160, score=0.5166000723838806
origin_x=419, origin_y=89, width=206, height=206, score=0.5076792240142822
origin_x=456, origin_y=76, width=164, height=164, 

origin_x=262, origin_y=159, width=293, height=293, score=0.5969769358634949
origin_x=317, origin_y=146, width=305, height=305, score=0.5015551447868347
origin_x=357, origin_y=183, width=281, height=281, score=0.6013743281364441
origin_x=394, origin_y=178, width=266, height=266, score=0.5657192468643188
No face detections found.
No face detections found.
No face detections found.
No face detections found.
origin_x=474, origin_y=98, width=158, height=158, score=0.5777639746665955
origin_x=478, origin_y=90, width=151, height=151, score=0.7354536652565002
origin_x=474, origin_y=95, width=159, height=159, score=0.6399800181388855
origin_x=477, origin_y=94, width=154, height=154, score=0.660780668258667
origin_x=475, origin_y=95, width=151, height=151, score=0.7430195212364197
origin_x=478, origin_y=91, width=149, height=149, score=0.7979031801223755
origin_x=479, origin_y=88, width=153, height=153, score=0.7700915932655334
origin_x=476, origin_y=90, width=150, height=150, score=0.7034370303

No face detections found.
origin_x=0, origin_y=217, width=196, height=196, score=0.5552668571472168
origin_x=0, origin_y=219, width=196, height=196, score=0.5588127970695496
origin_x=0, origin_y=219, width=197, height=197, score=0.5394513010978699
origin_x=0, origin_y=215, width=202, height=202, score=0.5421926379203796
origin_x=0, origin_y=215, width=202, height=202, score=0.5421926379203796
origin_x=0, origin_y=218, width=200, height=200, score=0.5254113078117371
origin_x=0, origin_y=219, width=201, height=201, score=0.5325732827186584
origin_x=0, origin_y=217, width=199, height=199, score=0.5022442936897278
No face detections found.
origin_x=0, origin_y=222, width=193, height=193, score=0.5042617321014404
origin_x=0, origin_y=221, width=195, height=195, score=0.5248641967773438
origin_x=0, origin_y=222, width=195, height=195, score=0.532082200050354
origin_x=0, origin_y=219, width=200, height=200, score=0.5246579647064209
origin_x=0, origin_y=218, width=202, height=202, score=0.5004

origin_x=0, origin_y=207, width=203, height=203, score=0.8418576717376709
origin_x=0, origin_y=209, width=201, height=201, score=0.8418425917625427
origin_x=0, origin_y=210, width=198, height=198, score=0.8642944097518921
origin_x=0, origin_y=207, width=199, height=199, score=0.9053069949150085
origin_x=0, origin_y=205, width=202, height=202, score=0.9193167090415955
origin_x=0, origin_y=206, width=200, height=200, score=0.9386427402496338
origin_x=0, origin_y=216, width=187, height=187, score=0.9425899982452393
origin_x=0, origin_y=213, width=188, height=188, score=0.9483611583709717
origin_x=0, origin_y=214, width=189, height=189, score=0.948340892791748
origin_x=0, origin_y=214, width=187, height=187, score=0.9444888234138489
origin_x=0, origin_y=210, width=189, height=189, score=0.943577229976654
origin_x=0, origin_y=211, width=187, height=187, score=0.9291393160820007
origin_x=0, origin_y=213, width=186, height=186, score=0.9396741986274719
origin_x=0, origin_y=213, width=184, hei

origin_x=0, origin_y=216, width=203, height=203, score=0.8532009124755859
origin_x=0, origin_y=214, width=201, height=201, score=0.866804838180542
origin_x=0, origin_y=216, width=198, height=198, score=0.9027010798454285
origin_x=0, origin_y=214, width=198, height=198, score=0.8959956765174866
origin_x=0, origin_y=214, width=200, height=200, score=0.8859909772872925
origin_x=0, origin_y=215, width=195, height=195, score=0.9062216877937317
origin_x=0, origin_y=213, width=197, height=197, score=0.8853511214256287
origin_x=0, origin_y=214, width=195, height=195, score=0.9121196269989014
origin_x=0, origin_y=211, width=193, height=193, score=0.9106817841529846
origin_x=0, origin_y=213, width=194, height=194, score=0.9058340787887573
origin_x=0, origin_y=215, width=194, height=194, score=0.9040898680686951
origin_x=0, origin_y=214, width=195, height=195, score=0.9047528505325317
origin_x=0, origin_y=214, width=197, height=197, score=0.9103726744651794
origin_x=0, origin_y=214, width=198, he

origin_x=3, origin_y=205, width=199, height=199, score=0.9707127213478088
origin_x=2, origin_y=206, width=199, height=199, score=0.9686807990074158
origin_x=2, origin_y=209, width=198, height=198, score=0.9655389189720154
origin_x=3, origin_y=209, width=197, height=197, score=0.9646963477134705
origin_x=3, origin_y=209, width=197, height=197, score=0.9613003730773926
origin_x=2, origin_y=209, width=197, height=197, score=0.9587266445159912
origin_x=3, origin_y=210, width=197, height=197, score=0.958747386932373
origin_x=3, origin_y=209, width=198, height=198, score=0.9580601453781128
origin_x=4, origin_y=209, width=198, height=198, score=0.9578551650047302
origin_x=3, origin_y=210, width=197, height=197, score=0.9615970253944397
origin_x=2, origin_y=208, width=199, height=199, score=0.958248496055603
origin_x=4, origin_y=207, width=197, height=197, score=0.9639664888381958
origin_x=5, origin_y=207, width=196, height=196, score=0.9644262790679932
origin_x=3, origin_y=209, width=196, hei

origin_x=53, origin_y=210, width=196, height=196, score=0.9460629820823669
origin_x=58, origin_y=211, width=195, height=195, score=0.9482037425041199
origin_x=59, origin_y=209, width=199, height=199, score=0.9484374523162842
origin_x=60, origin_y=210, width=199, height=199, score=0.9473071098327637
origin_x=63, origin_y=209, width=200, height=200, score=0.9373833537101746
origin_x=64, origin_y=208, width=205, height=205, score=0.9348527789115906
origin_x=66, origin_y=207, width=205, height=205, score=0.9451133608818054
origin_x=69, origin_y=204, width=207, height=207, score=0.936740517616272
origin_x=72, origin_y=206, width=208, height=208, score=0.9377521276473999
origin_x=74, origin_y=202, width=212, height=212, score=0.932711124420166
origin_x=76, origin_y=205, width=208, height=208, score=0.9263936281204224
origin_x=76, origin_y=201, width=213, height=213, score=0.9428471326828003
origin_x=78, origin_y=201, width=214, height=214, score=0.9429599046707153
origin_x=80, origin_y=202, 

origin_x=0, origin_y=216, width=199, height=199, score=0.8672754168510437
origin_x=0, origin_y=217, width=197, height=197, score=0.8287491202354431
origin_x=0, origin_y=216, width=203, height=203, score=0.8132284879684448
origin_x=0, origin_y=217, width=200, height=200, score=0.8039112687110901
origin_x=0, origin_y=215, width=204, height=204, score=0.7764854431152344
origin_x=0, origin_y=215, width=198, height=198, score=0.8346271514892578
origin_x=0, origin_y=213, width=201, height=201, score=0.8462614417076111
origin_x=0, origin_y=216, width=200, height=200, score=0.8017711639404297
origin_x=0, origin_y=215, width=203, height=203, score=0.8351528644561768
origin_x=0, origin_y=211, width=202, height=202, score=0.8509887456893921
origin_x=0, origin_y=218, width=197, height=197, score=0.8621832728385925
origin_x=0, origin_y=213, width=200, height=200, score=0.8399567008018494
origin_x=0, origin_y=213, width=203, height=203, score=0.8338906168937683
origin_x=0, origin_y=210, width=206, h

No face detections found.
No face detections found.
origin_x=0, origin_y=211, width=185, height=185, score=0.5787357687950134
origin_x=430, origin_y=81, width=150, height=150, score=0.6075335144996643
origin_x=435, origin_y=78, width=153, height=153, score=0.6125492453575134
origin_x=438, origin_y=97, width=129, height=129, score=0.8073045015335083
origin_x=443, origin_y=90, width=138, height=138, score=0.7187249660491943
origin_x=440, origin_y=99, width=174, height=174, score=0.5141412019729614
origin_x=444, origin_y=91, width=178, height=178, score=0.6305363774299622
origin_x=444, origin_y=84, width=166, height=166, score=0.5833196043968201
origin_x=449, origin_y=92, width=145, height=145, score=0.7808199524879456
origin_x=448, origin_y=84, width=146, height=146, score=0.8092489242553711
origin_x=454, origin_y=93, width=151, height=151, score=0.781224250793457
origin_x=453, origin_y=85, width=146, height=146, score=0.8245108723640442
origin_x=455, origin_y=91, width=145, height=145, 

origin_x=474, origin_y=90, width=152, height=152, score=0.861348569393158
origin_x=474, origin_y=92, width=150, height=150, score=0.8691631555557251
origin_x=475, origin_y=94, width=147, height=147, score=0.8620088696479797
origin_x=475, origin_y=93, width=147, height=147, score=0.8787343502044678
origin_x=478, origin_y=91, width=146, height=146, score=0.8594614267349243
origin_x=478, origin_y=93, width=146, height=146, score=0.8486199378967285
origin_x=478, origin_y=91, width=150, height=150, score=0.8200281262397766
origin_x=479, origin_y=94, width=150, height=150, score=0.7849557399749756
origin_x=479, origin_y=92, width=149, height=149, score=0.8155346512794495
origin_x=479, origin_y=91, width=149, height=149, score=0.8155944347381592
origin_x=480, origin_y=91, width=147, height=147, score=0.8176576495170593
origin_x=479, origin_y=92, width=147, height=147, score=0.8325887322425842
origin_x=481, origin_y=94, width=146, height=146, score=0.7995976805686951
origin_x=480, origin_y=92,

origin_x=486, origin_y=99, width=144, height=144, score=0.7593975067138672
origin_x=484, origin_y=102, width=147, height=147, score=0.7810691595077515
origin_x=483, origin_y=101, width=147, height=147, score=0.7755131721496582
origin_x=483, origin_y=100, width=149, height=149, score=0.809488832950592
origin_x=483, origin_y=103, width=149, height=149, score=0.7821105718612671
origin_x=483, origin_y=103, width=145, height=145, score=0.8060455322265625
origin_x=483, origin_y=96, width=147, height=147, score=0.8149200677871704
origin_x=483, origin_y=96, width=145, height=145, score=0.8300637602806091
origin_x=482, origin_y=93, width=147, height=147, score=0.8308063745498657
origin_x=482, origin_y=95, width=145, height=145, score=0.8418691158294678
origin_x=483, origin_y=96, width=146, height=146, score=0.8161588311195374
origin_x=481, origin_y=96, width=147, height=147, score=0.8314598202705383
origin_x=483, origin_y=97, width=146, height=146, score=0.8055431842803955
origin_x=482, origin_

origin_x=479, origin_y=92, width=147, height=147, score=0.832362949848175
origin_x=477, origin_y=92, width=149, height=149, score=0.8378314971923828
origin_x=478, origin_y=100, width=145, height=145, score=0.8288132548332214
origin_x=479, origin_y=98, width=145, height=145, score=0.8375198245048523
origin_x=480, origin_y=97, width=143, height=143, score=0.8495782017707825
origin_x=480, origin_y=102, width=140, height=140, score=0.8727708458900452
origin_x=475, origin_y=98, width=146, height=146, score=0.8349844813346863
origin_x=478, origin_y=98, width=143, height=143, score=0.8662387728691101
origin_x=478, origin_y=96, width=143, height=143, score=0.849378228187561
origin_x=476, origin_y=98, width=145, height=145, score=0.7994861006736755
origin_x=479, origin_y=98, width=144, height=144, score=0.8135591149330139
origin_x=483, origin_y=96, width=144, height=144, score=0.7665013670921326
origin_x=483, origin_y=94, width=148, height=148, score=0.7654958963394165
origin_x=476, origin_y=96

origin_x=151, origin_y=192, width=232, height=232, score=0.9605771899223328
origin_x=152, origin_y=194, width=231, height=231, score=0.9622389674186707
origin_x=152, origin_y=194, width=231, height=231, score=0.9622389674186707
origin_x=151, origin_y=193, width=230, height=230, score=0.9631391763687134
origin_x=151, origin_y=193, width=230, height=230, score=0.9631391763687134
origin_x=152, origin_y=194, width=229, height=229, score=0.9583649039268494
origin_x=155, origin_y=196, width=225, height=225, score=0.9531140327453613
origin_x=152, origin_y=192, width=230, height=230, score=0.9510937929153442
origin_x=152, origin_y=195, width=228, height=228, score=0.9460464715957642
origin_x=152, origin_y=194, width=230, height=230, score=0.9488714337348938
origin_x=150, origin_y=193, width=233, height=233, score=0.9521312713623047
origin_x=152, origin_y=193, width=230, height=230, score=0.9493827223777771
origin_x=151, origin_y=194, width=229, height=229, score=0.9488529562950134
origin_x=153

origin_x=140, origin_y=200, width=218, height=218, score=0.8808630108833313
origin_x=138, origin_y=198, width=221, height=221, score=0.8821083903312683
origin_x=139, origin_y=199, width=220, height=220, score=0.8806076645851135
origin_x=139, origin_y=198, width=222, height=222, score=0.8865352869033813
origin_x=138, origin_y=197, width=224, height=224, score=0.9033485054969788
origin_x=138, origin_y=197, width=224, height=224, score=0.9033143520355225
origin_x=137, origin_y=196, width=226, height=226, score=0.9026585221290588
origin_x=139, origin_y=197, width=226, height=226, score=0.8992294073104858
origin_x=139, origin_y=197, width=226, height=226, score=0.9075702428817749
origin_x=140, origin_y=197, width=226, height=226, score=0.9109748601913452
origin_x=140, origin_y=197, width=227, height=227, score=0.9123350381851196
origin_x=139, origin_y=198, width=227, height=227, score=0.9104173183441162
origin_x=138, origin_y=198, width=228, height=228, score=0.9056184887886047
origin_x=139

origin_x=159, origin_y=197, width=229, height=229, score=0.9373396635055542
origin_x=157, origin_y=197, width=229, height=229, score=0.9285579919815063
origin_x=158, origin_y=197, width=230, height=230, score=0.9314557909965515
origin_x=159, origin_y=197, width=230, height=230, score=0.9356473684310913
origin_x=160, origin_y=197, width=229, height=229, score=0.9356780052185059
origin_x=160, origin_y=196, width=230, height=230, score=0.9323408603668213
origin_x=160, origin_y=197, width=229, height=229, score=0.9325222373008728
origin_x=160, origin_y=196, width=229, height=229, score=0.941502571105957
origin_x=159, origin_y=196, width=230, height=230, score=0.9366519451141357
origin_x=158, origin_y=197, width=230, height=230, score=0.9368079900741577
origin_x=158, origin_y=196, width=231, height=231, score=0.9391330480575562
origin_x=158, origin_y=197, width=229, height=229, score=0.9371449947357178
origin_x=157, origin_y=197, width=231, height=231, score=0.9385119080543518
origin_x=156,

In [35]:
import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# Base options for both face and gesture detection
BaseOptions = mp.tasks.BaseOptions
VisionRunningMode = mp.tasks.vision.RunningMode

# Face detection options and classes
FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions
FaceDetectorResult = mp.tasks.vision.FaceDetectorResult

# Gesture recognition options and classes
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult

# Global variables for face detection
origin_x, origin_y, width, height, score, flag = 0, 0, 0, 0, 0, False
current_gesture = "No gesture detected"
hand_landmarks = []

# Callback function for face detection results
def print_face_result(result: FaceDetectorResult, output_image: mp.Image, timestamp_ms: int):
    global origin_x, origin_y, width, height, score, flag
    if not result.detections:
        print('No face detections found.')
        flag = False
        return
    
    flag = True
    detection = result.detections[0]
    bounding_box = detection.bounding_box
    origin_x = bounding_box.origin_x
    origin_y = bounding_box.origin_y
    width = bounding_box.width
    height = bounding_box.height
    score = detection.categories[0].score
    print(f'origin_x={origin_x}, origin_y={origin_y}, width={width}, height={height}, score={score}')

# Callback function for gesture recognition results
def print_gesture_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    global current_gesture, hand_landmarks
    if result.gestures:
        gesture = result.gestures[0][0]  # Take the first gesture
        current_gesture = gesture.category_name
    else:
        current_gesture = "No gesture detected"
    
    if result.hand_landmarks:
        hand_landmarks = result.hand_landmarks[0]
    else:
        hand_landmarks = []

# Options for face detection
face_options = FaceDetectorOptions(
    base_options=BaseOptions(model_asset_path='blaze_face_short_range.tflite'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_face_result
)

# Options for gesture recognition
gesture_options = GestureRecognizerOptions(
    base_options=BaseOptions(model_asset_path='gesture_recognizer.task'),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_gesture_result
)

# Initialize face detector and gesture recognizer
with FaceDetector.create_from_options(face_options) as face_detector, \
     GestureRecognizer.create_from_options(gesture_options) as gesture_recognizer:
    
    # Open the camera
    cap = cv2.VideoCapture(0)
    
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            print("Ignoring empty frame")
            continue

        # Convert to RGB image
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Create MediaPipe image object
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
        
        # Get current timestamp
        timestamp_ms = int(cap.get(cv2.CAP_PROP_POS_MSEC))

        # Process image for face detection
        face_detector.detect_async(mp_image, timestamp_ms)
        
        # Process image for gesture recognition
        gesture_recognizer.recognize_async(mp_image, timestamp_ms)

        # Draw face bounding box if a face is detected
        if flag:
            top_left = (origin_x, origin_y)
            bottom_right = (origin_x + width, origin_y + height)
            color = (0, 255, 0)  # Green
            thickness = 2
            cv2.rectangle(frame, top_left, bottom_right, color, thickness)
            text = f'Score: {score:.2f}'
            cv2.putText(frame, text, (origin_x, origin_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        
        # Draw gesture recognition result
        cv2.putText(frame, f'Gesture: {current_gesture}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
        
        # Draw hand landmarks if detected
        if hand_landmarks:
            for landmark in hand_landmarks:
                x = int(landmark.x * frame.shape[1])
                y = int(landmark.y * frame.shape[0])
                
                # Calculate the size of the point based on the z value
                z = landmark.z
                min_point_size = 1
                max_point_size = 15
                # Normalizing z value to be between 0 and 1, assuming z value ranges between -0.1 and 0.1
                normalized_z = (z + 0.1) / 0.2
                point_size = max(min_point_size, max_point_size - int(normalized_z * (max_point_size - min_point_size)))
                
                cv2.circle(frame, (x, y), point_size, (255, 0, 0), -1)  # Blue dot

        # Display video stream
        cv2.imshow('Face and Gesture Recognition', frame)
        
        if cv2.waitKey(5) & 0xFF == 27:
            break
    
    cap.release()
    cv2.destroyAllWindows()


origin_x=165, origin_y=212, width=194, height=194, score=0.960536003112793
origin_x=166, origin_y=210, width=195, height=195, score=0.9603395462036133
origin_x=165, origin_y=212, width=196, height=196, score=0.9604800343513489
origin_x=165, origin_y=212, width=195, height=195, score=0.9628810286521912
origin_x=166, origin_y=212, width=194, height=194, score=0.9584035277366638
origin_x=165, origin_y=211, width=195, height=195, score=0.9604224562644958
origin_x=164, origin_y=209, width=196, height=196, score=0.9606904983520508
origin_x=166, origin_y=210, width=195, height=195, score=0.9592450857162476
origin_x=167, origin_y=216, width=191, height=191, score=0.9632335305213928
origin_x=164, origin_y=212, width=196, height=196, score=0.9623339176177979
origin_x=164, origin_y=213, width=195, height=195, score=0.9656818509101868
origin_x=164, origin_y=211, width=196, height=196, score=0.9727169871330261
origin_x=166, origin_y=209, width=196, height=196, score=0.9726331830024719
origin_x=165,

origin_x=89, origin_y=206, width=201, height=201, score=0.6369137763977051
origin_x=94, origin_y=178, width=220, height=220, score=0.6114633679389954
origin_x=87, origin_y=210, width=196, height=196, score=0.7308747172355652
origin_x=87, origin_y=223, width=184, height=184, score=0.6900644302368164
origin_x=86, origin_y=221, width=194, height=194, score=0.677983820438385
origin_x=82, origin_y=221, width=217, height=217, score=0.6770198941230774
origin_x=90, origin_y=218, width=234, height=234, score=0.5826742053031921
origin_x=87, origin_y=210, width=198, height=198, score=0.5890628695487976
origin_x=86, origin_y=208, width=200, height=200, score=0.6381925940513611
origin_x=90, origin_y=214, width=198, height=198, score=0.5651969909667969
origin_x=87, origin_y=210, width=191, height=191, score=0.6377801895141602
origin_x=87, origin_y=210, width=191, height=191, score=0.6377801895141602
origin_x=84, origin_y=203, width=191, height=191, score=0.6741469502449036
origin_x=81, origin_y=198,

origin_x=83, origin_y=207, width=183, height=183, score=0.9773961305618286
origin_x=86, origin_y=207, width=182, height=182, score=0.971193253993988
origin_x=86, origin_y=208, width=183, height=183, score=0.9725757837295532
origin_x=86, origin_y=208, width=184, height=184, score=0.9674103856086731
origin_x=86, origin_y=206, width=187, height=187, score=0.9720864295959473
origin_x=86, origin_y=207, width=185, height=185, score=0.9727808237075806
origin_x=86, origin_y=207, width=185, height=185, score=0.9727808237075806
origin_x=85, origin_y=209, width=184, height=184, score=0.9674660563468933
origin_x=85, origin_y=207, width=186, height=186, score=0.9736559987068176
origin_x=85, origin_y=207, width=187, height=187, score=0.9753417372703552
origin_x=84, origin_y=208, width=186, height=186, score=0.9742498397827148
origin_x=86, origin_y=206, width=187, height=187, score=0.9696085453033447
origin_x=87, origin_y=208, width=186, height=186, score=0.9613437652587891
origin_x=87, origin_y=209,

origin_x=86, origin_y=214, width=186, height=186, score=0.9539798498153687
origin_x=86, origin_y=211, width=189, height=189, score=0.9404117465019226
origin_x=87, origin_y=210, width=188, height=188, score=0.9473589658737183
origin_x=87, origin_y=212, width=184, height=184, score=0.9460365176200867
origin_x=88, origin_y=213, width=184, height=184, score=0.9503194689750671
origin_x=88, origin_y=213, width=186, height=186, score=0.9385446906089783
origin_x=87, origin_y=210, width=189, height=189, score=0.9478708505630493
origin_x=87, origin_y=210, width=189, height=189, score=0.9478708505630493
origin_x=88, origin_y=210, width=187, height=187, score=0.9496419429779053
origin_x=87, origin_y=211, width=185, height=185, score=0.9549192190170288
origin_x=87, origin_y=213, width=185, height=185, score=0.9545804858207703
origin_x=88, origin_y=214, width=183, height=183, score=0.9536429643630981
origin_x=87, origin_y=214, width=184, height=184, score=0.9539468288421631
origin_x=87, origin_y=215

origin_x=90, origin_y=208, width=179, height=179, score=0.9747359752655029
origin_x=89, origin_y=208, width=180, height=180, score=0.9779890179634094
origin_x=92, origin_y=210, width=178, height=178, score=0.9778375029563904
origin_x=92, origin_y=211, width=178, height=178, score=0.9764053821563721
origin_x=90, origin_y=209, width=182, height=182, score=0.9755330681800842
origin_x=90, origin_y=208, width=183, height=183, score=0.9750808477401733
origin_x=91, origin_y=211, width=180, height=180, score=0.9752821922302246
origin_x=93, origin_y=212, width=179, height=179, score=0.9730027318000793
origin_x=93, origin_y=212, width=179, height=179, score=0.9700108170509338
origin_x=94, origin_y=211, width=179, height=179, score=0.9709058403968811
origin_x=92, origin_y=210, width=182, height=182, score=0.9636122584342957
origin_x=90, origin_y=209, width=185, height=185, score=0.9624062180519104
origin_x=89, origin_y=208, width=186, height=186, score=0.9668420553207397
origin_x=88, origin_y=209

origin_x=71, origin_y=204, width=187, height=187, score=0.9667800068855286
origin_x=77, origin_y=201, width=189, height=189, score=0.9712432026863098
origin_x=80, origin_y=199, width=191, height=191, score=0.9724011421203613
origin_x=84, origin_y=196, width=193, height=193, score=0.9732655882835388
origin_x=88, origin_y=198, width=193, height=193, score=0.9745122194290161
origin_x=91, origin_y=197, width=194, height=194, score=0.9710772633552551
origin_x=95, origin_y=197, width=195, height=195, score=0.9701903462409973
origin_x=103, origin_y=200, width=193, height=193, score=0.9702789783477783
origin_x=110, origin_y=199, width=192, height=192, score=0.9708657264709473
origin_x=114, origin_y=200, width=191, height=191, score=0.9704591631889343
origin_x=117, origin_y=200, width=193, height=193, score=0.9687865972518921
origin_x=117, origin_y=199, width=195, height=195, score=0.975858211517334
origin_x=122, origin_y=202, width=194, height=194, score=0.9694327116012573
origin_x=120, origin

origin_x=132, origin_y=206, width=201, height=201, score=0.9642153382301331
origin_x=134, origin_y=206, width=200, height=200, score=0.9634960293769836
origin_x=135, origin_y=208, width=199, height=199, score=0.962472140789032
origin_x=136, origin_y=209, width=197, height=197, score=0.9657835960388184
origin_x=138, origin_y=209, width=197, height=197, score=0.9591155648231506
origin_x=140, origin_y=208, width=198, height=198, score=0.9580809473991394
origin_x=141, origin_y=208, width=198, height=198, score=0.9568791389465332
origin_x=143, origin_y=212, width=196, height=196, score=0.9548782110214233
origin_x=142, origin_y=210, width=199, height=199, score=0.9480738043785095
origin_x=142, origin_y=210, width=200, height=200, score=0.9448502063751221
origin_x=142, origin_y=211, width=199, height=199, score=0.9459753036499023
origin_x=143, origin_y=212, width=198, height=198, score=0.940773069858551
origin_x=144, origin_y=212, width=198, height=198, score=0.9373526573181152
origin_x=144, 