##### Copyright 2023 The MediaPipe Authors. All Rights Reserved.

In [27]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [28]:
import mediapipe as mp
import cv2
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

## Put Annotations on the frame

In [29]:
MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

## Detect landmarks in vid

In [30]:
# def detect_landmark_video(vid_path,out_path,start_frame,num_frames):
#     # Use OpenCV’s VideoCapture to load the input video:
#     video_capture = cv2.VideoCapture(vid_path)

#     # Load the frame rate of the video using OpenCV’s CV_CAP_PROP_FPS:
#     frame_rate = video_capture.get(cv2.CAP_PROP_FPS)

#     # calculate the timestamp for each frame in ms:
#     timestamp = 1 / frame_rate * 1000
#     frame_cnt = 0

#     BaseOptions = mp.tasks.BaseOptions
#     HandLandmarker = mp.tasks.vision.HandLandmarker
#     HandLandmarkerOptions = mp.tasks.vision.HandLandmarkerOptions
#     VisionRunningMode = mp.tasks.vision.RunningMode

#     model_path = './hand_landmarker.task'
    
#     annotated_results = []
#     # Create a hand landmarker instance with the video mode:
#     options = HandLandmarkerOptions(
#         base_options=BaseOptions(model_asset_path=model_path),
#         running_mode=VisionRunningMode.VIDEO,
#         min_hand_detection_confidence=0.05,
#         min_hand_presence_confidence=0.05,
#         min_tracking_confidence=0.05)
#     with HandLandmarker.create_from_options(options) as landmarker:
#         frame_timestamp = 0
#         # Loop through each frame in the video using VideoCapture.read():
#         while video_capture.isOpened():
#             ret, frame = video_capture.read()
#             if not ret:
#                 break
#             if frame_cnt == start_frame+num_frames:
#                 break
#             frame_cnt += 1
            
#             if frame_cnt >= start_frame:
#                 # Convert the frame received from OpenCV to a MediaPipe’s Image object.
#                 mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

#                 # calculate the frame timestamp in ms:
#                 frame_timestamp += timestamp

#                 # Perform hand landmarks detection on the provided single image:
#                 hand_landmarker_result = landmarker.detect_for_video(mp_image, int(frame_timestamp))

#                 annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), hand_landmarker_result)
#                 # cv2.imshow('Frame',annotated_image)
#                 # cv2.waitKey(0)
#                 # cv2.destroyAllWindows() # It destroys the image showing the window.

#                 # append the annotated image to the list:
#                 annotated_results.append(annotated_image)

#         # Release the VideoCapture object and close the OpenCV window:
#         video_capture.release()

#         # Save the annotated results as a video:
#         height, width, _ = annotated_results[0].shape
#         fourcc = cv2.VideoWriter_fourcc(*'XVID')
#         out = cv2.VideoWriter(out_path, fourcc, frame_rate, (width, height))
#         for frame in annotated_results:
#             out.write(frame)

#         out.release()

## 2D prediction

In [2]:
import os
import time

data_path = '/media/pruszynski/Data'
recording_name = 'Recording_2023-05-15T154223' #Jon's no board
# recording_name = 'Recording_2023-05-05T100719' #Tomo's messy situation
# recording_name = 'Recording_2024-01-31T114025' # Ali's normal recording
pred_path = './Predictions2D'

start_frame = 0
num_frames = 2000
# Loop through all .avi vids in the folder
for filename in os.listdir(os.path.join(data_path, recording_name)):
    start_time = time.time()  # Record the start time of the iteration

    if filename.endswith('.avi'):
        # Construct the full path to the .avi file and output prediction:
        vid_path = os.path.join(data_path, recording_name, filename)
        out_path = os.path.join(pred_path, recording_name, filename)

        # Ensure the prediction directory exists, create it if it doesn't
        if not os.path.exists(os.path.join(pred_path,recording_name)):
            os.makedirs(os.path.join(pred_path,recording_name))
        
        # make 2D predictions:
        detect_landmark_video(vid_path,out_path,start_frame,num_frames)
    
    end_time = time.time()  # Record the end time of the iteration
    iteration_time = end_time - start_time  # Calculate the time taken by the iteration
    print(f"Duration: {iteration_time:.2f} seconds")


I0000 00:00:1715133084.559674  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715133084.627690  852954 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


Duration: 0.23 seconds


I0000 00:00:1715133084.780227  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715133084.888300  852987 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 0.22 seconds


I0000 00:00:1715133085.000142  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715133085.127886  853005 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 0.26 seconds


I0000 00:00:1715133085.259736  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715133085.368188  853031 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2
I0000 00:00:1715133085.498451  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5


Duration: 0.24 seconds


I0000 00:00:1715133085.628598  853048 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 0.26 seconds


I0000 00:00:1715133085.760630  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715133085.888053  853065 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2
I0000 00:00:1715133086.017882  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715133086.147814  853082 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 0.26 seconds
Duration: 0.26 seconds
Duration: 0.28 seconds


I0000 00:00:1715133086.279829  852888 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715133086.408025  853099 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


In [1]:
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

def detect_landmark_video(vid_path,out_path,start_frame,num_frames):
    # Use OpenCV’s VideoCapture to load the input video:
    video_capture = cv2.VideoCapture(vid_path)

    # Load the frame rate of the video using OpenCV’s CV_CAP_PROP_FPS:
    frame_rate = video_capture.get(cv2.CAP_PROP_FPS)

    annotated_results = []
    # Create a hand landmarker instance with the video mode:
    with mp_hands.Hands(
    model_complexity=0,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
      while video_capture.isOpened():
        # success, image = cap.read()
        ret, image = video_capture.read()
        if not ret:
          print("Ignoring empty camera frame.")
          # If loading a video, use 'break' instead of 'continue'.
          break
        
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image)

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
          for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
        
        # append the annotated image to the list:
        annotated_results.append(image)
      
      # Release the VideoCapture object and close the OpenCV window:
      video_capture.release()

      # Save the annotated results as a video:
      height, width, _ = annotated_results[0].shape
      fourcc = cv2.VideoWriter_fourcc(*'XVID')
      out = cv2.VideoWriter(out_path, fourcc, frame_rate, (width, height))
      for frame in annotated_results:
          out.write(frame)

      out.release()