In [None]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m29.2 MB/s[0m eta [36m0:00:00[0m
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.4.6-py3-none-any.whl (31 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->mediapipe)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->mediapipe)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->mediapipe)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->mediapipe)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1

In [None]:
import tensorflow as tf
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [None]:
from google.colab.patches import cv2_imshow

In [None]:
model_path = '/content/drive/MyDrive/hand_gesture_detection/model/keypoint_classifier.hdf5'
model = tf.keras.models.load_model(model_path)

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) #chuyển ảnh từ BGR sang RGB
    image.flags.writeable = False    #không cho sửa ảnh một cách trực tiếp tăng tốc độ xử lý
    results = model.process(image)    #tạo ra dự đoán
    image.flags.writeable = True     #cho phép sửa ảnh trong bộ nhớ
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) #chuyển ảnh từ RGB sang BGR
    return image, results

In [None]:
def draw_landmarks(image, results):

    # Vẽ tọa độ các điểm tay trái
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    # Vẽ tọa độ các điểm tay phải
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [None]:
def extract_keypoints(results):
    #trích xuất giá trị tọa độ các điểm trên tay trái, nếu điểm không xuất hiện trên khung hình thì giá trị là 0
    lh = list(np.array([[res.x, res.y] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*2))
    #trích xuất giá trị tọa độ các điểm trên tay phải, nếu điểm không xuất hiện trên khung hình thì giá trị là 0
    rh = list(np.array([[res.x, res.y] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*2))
    return lh ,rh

In [None]:
def calc_bounding_rect(image, results):
    image_width, image_height = image.shape[1], image.shape[0]
    landmark_array = np.empty((0, 2), int)
    for res in results:
        landmark_x = min(int(res.x * image_width), image_width - 1)
        landmark_y = min(int(res.y * image_height), image_height - 1)

        landmark_point = [np.array((landmark_x, landmark_y))]

        landmark_array = np.append(landmark_array, landmark_point, axis=0)

    x, y, w, h = cv2.boundingRect(landmark_array)
    return [x-10, y-10, x + w +10 , y + h + 10]

In [None]:
def draw_bounding_rect(image, results, index, hand):
    action = ['iloveyou','hello','unknow']
    text = hand[0]+action[index]
    x1, y1 ,x2 ,y2 = calc_bounding_rect(image, results)
    cv2.rectangle(image, (x1, y1), (x2, y2), (0,0,0),2 )
    cv2.putText(image,text,(x1,y1),cv2.FONT_HERSHEY_SIMPLEX,0.7,(255, 255, 255),2,cv2.LINE_AA)
    return image

In [None]:
mp_holistic = mp.solutions.holistic    #sử dụng holistic model
mp_drawing = mp.solutions.drawing_utils #vẽ ra màn hình kết quả

In [22]:
cap = cv2.VideoCapture('/content/drive/MyDrive/hand_gesture_detection/2024-05-01-015619.webm')

fourcc = cv2.VideoWriter_fourcc(*'XVID')
video_out = cv2.VideoWriter('video_output.avi', fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
      while cap.isOpened():
          ret, frame = cap.read()
          if not ret:
              break

          image, results = mediapipe_detection(frame, holistic)

          lh_results_list, rh_results_list = extract_keypoints(results)

          if lh_results_list.count(0) != len(lh_results_list):
            text_left_hand = ['left_hand:']
            lh_results_array = np.array(lh_results_list)
            lh_results_array = lh_results_array.reshape(1,-1)
            lh_action = model.predict(lh_results_array)
            lh_action_index = np.argmax(np.squeeze(lh_action))
            draw_bounding_rect(image, results.left_hand_landmarks.landmark, lh_action_index, text_left_hand)
            draw_landmarks(image, results)

          if rh_results_list.count(0) != len(rh_results_list):
            text_right_hand = ['right_hand:']
            rh_results_array = np.array(rh_results_list)
            rh_results_array = rh_results_array.reshape(1,-1)
            rh_action = model.predict(rh_results_array)
            rh_action_index = np.argmax(np.squeeze(rh_action))
            draw_bounding_rect(image,results.right_hand_landmarks.landmark, rh_action_index,text_right_hand)
            draw_landmarks(image, results)


          video_out.write(image)
          cv2_imshow(image)

          # print(np.squeeze(lh_action))
          # print(np.squeeze(rh_action))
          # print(np.argmax(np.squeeze(rh_action)))
          if cv2.waitKey(40) & 0xFF == ord('q'):
              break
      cap.release()
      video_out.release()
      cv2.destroyAllWindows()

Output hidden; open in https://colab.research.google.com to view.