##### Copyright 2023 The MediaPipe Authors. All Rights Reserved.

In [182]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

### 2D Detection of landmarks in a vid and return dataframe:

In [183]:
import cv2
import numpy as np
import mediapipe as mp
import pandas as pd
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands


# Define the number of columns:
num_landmarks = 21

def detect_landmark_video(vid_path,start_frame,num_frames):
    # Use OpenCV’s VideoCapture to load the input video:
    video_capture = cv2.VideoCapture(vid_path)

    # Check if the video is opened successfully
    if not video_capture.isOpened():
        print(f"Error: Could not open video {vid_path}.")
        return
    
    # Set the starting frame
    video_capture.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    # counter of frames:
    frame_count = 0

    annotated_results = []
    # Create a hand landmarker instance with the video mode:
    with mp_hands.Hands(
    model_complexity=1,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
      while video_capture.isOpened():
        ret, image = video_capture.read()
        if not ret:
          # When vid is done:
          break
        
        if frame_count < num_frames:
          frame_count += 1
          # To improve performance, optionally mark the image as not writeable to
          # pass by reference.
          image.flags.writeable = False
          image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
          results = hands.process(image)

          # Draw the hand annotations on the image.
          image.flags.writeable = True
          image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
          if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
              mp_drawing.draw_landmarks(
                  image,
                  hand_landmarks,
                  mp_hands.HAND_CONNECTIONS,
                  mp_drawing_styles.get_default_hand_landmarks_style(),
                  mp_drawing_styles.get_default_hand_connections_style())
          
          image_height, image_width, _ = image.shape
          # append the annotattions to the list:
          annotated_results.append(results)
        else:
           # if reached the ending frame:
           break

      # going through frames:
      annotations = []
      for result in annotated_results:
        x_points = []
        y_points = []

        # if nothing was detected in the frame:
        if result.multi_hand_landmarks is None:
          x_points = [-1] * num_landmarks
          y_points = [-1] * num_landmarks
        # if something is detected in the frame:
        else:  
          # going through landmarks of the frame:
          for cnt, hand_landmark in enumerate(result.multi_hand_landmarks[0].landmark):
            x_points.append(hand_landmark.x * image_width)
            y_points.append(hand_landmark.y * image_height)
        
        # make the annotations data appropriate for the dataframe:
        x_points.extend(y_points)
        annotations.append(x_points)
      
      # Define dataframe to hold the dannotations ata:
      # Create column names:
      columns = [f'x{i}' for i in range(0, num_landmarks)] + [f'y{i}' for i in range(0, num_landmarks)]

      # Create the DataFrame
      df = pd.DataFrame(annotations,columns=columns)
    
    # Release the VideoCapture object:
    video_capture.release()
    
    return df
          
        

### Create annotated videos from the 2D predictions

In [184]:
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands

def make_2D_reconstruction(vid_path,out_path,start_frame,num_frames):
    # Use OpenCV’s VideoCapture to load the input video:
    video_capture = cv2.VideoCapture(vid_path)

    # Load the frame rate of the video using OpenCV’s CV_CAP_PROP_FPS:
    frame_rate = video_capture.get(cv2.CAP_PROP_FPS)

    # Create video writer instance to make an annotated vid:
    ret, image = video_capture.read()
    height, width, _ = image.shape
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(out_path, fourcc, frame_rate, (width, height))

    annotated_results = []
    # Create a hand landmarker instance with the video mode:
    with mp_hands.Hands(
    model_complexity=1,
    max_num_hands=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as hands:
      while video_capture.isOpened():
        ret, image = video_capture.read()
        if not ret:
          print("Ignoring empty camera frame.")
          # If loading a video, use 'break' instead of 'continue'.
          break
        
        # To improve performance, optionally mark the image as not writeable to
        # pass by reference.
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = hands.process(image)

        # Draw the hand annotations on the image.
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        if results.multi_hand_landmarks:
          for hand_landmarks in results.multi_hand_landmarks:
            mp_drawing.draw_landmarks(
                image,
                hand_landmarks,
                mp_hands.HAND_CONNECTIONS,
                mp_drawing_styles.get_default_hand_landmarks_style(),
                mp_drawing_styles.get_default_hand_connections_style())
        
        # append the annotattions to the list:
        annotated_results.append(results)

        # write the annotated vid:
        out.write(image)
      
      # Release the VideoCapture object:
      video_capture.release()
      out.release()
          

      

## MAIN Control of the Program

In [185]:
import os
import time
import pandas as pd

data_path = '/media/pruszynski/Data'
recording_name = 'Recording_2023-05-15T154223' #Jon's no board
# recording_name = 'Recording_2023-05-05T100719' #Tomo's messy situation
# recording_name = 'Recording_2024-01-31T114025' # Ali's normal recording
pred_path = './Predictions2D'

start_frame = 0
num_frames = 100
result_df = pd.DataFrame()
# Loop through all .avi vids in the folder
for filename in os.listdir(os.path.join(data_path, recording_name)):
    if filename.endswith('.avi'):
        print(f"Detecting Video: {filename} , frame {start_frame} to {start_frame+num_frames-1}...")
        start_time = time.time()  # Record the start time of the iteration
        # Construct the full path to the .avi file and output prediction:
        vid_path = os.path.join(data_path, recording_name, filename)

        # Ensure the prediction directory exists, create it if it doesn't
        if not os.path.exists(os.path.join(pred_path, recording_name)):
            os.makedirs(os.path.join(pred_path, recording_name))
        
        # make 2D predictions:
        df = detect_landmark_video(vid_path, start_frame, num_frames)

        # add frame number to the df:
        df.insert(0, 'frame', [i for i in range(start_frame,start_frame+num_frames)])

        # add camera name to the df:
        df.insert(0, 'camera', [filename[:-4]]*len(df))

        # Concatenate dataframe of different vids to one:
        result_df = pd.concat([result_df, df], ignore_index=True)
    
        end_time = time.time()  # Record the end time of the iteration
        iteration_time = end_time - start_time  # Calculate the time taken by the iteration
        print(f"Duration: {iteration_time:.2f} seconds")

# Save the resulting dataframe to a CSV file:
df_path = os.path.join(pred_path, recording_name, recording_name+'.csv')
result_df.to_csv(df_path, index=False)


Detecting Video: Camera_LF.avi , frame 0 to 99...


I0000 00:00:1715557573.672319  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557573.759431  982249 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 3.37 seconds
Detecting Video: Camera_RM.avi , frame 0 to 99...


I0000 00:00:1715557577.047572  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557577.193880  982322 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 3.33 seconds
Detecting Video: Camera_LM.avi , frame 0 to 99...


I0000 00:00:1715557580.378184  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557580.509883  982354 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 4.08 seconds
Detecting Video: Camera_RB.avi , frame 0 to 99...


I0000 00:00:1715557584.460550  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557584.576879  982371 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 3.38 seconds
Detecting Video: Camera_RF.avi , frame 0 to 99...


I0000 00:00:1715557587.844752  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557587.960619  982388 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 3.43 seconds
Detecting Video: Camera_LB.avi , frame 0 to 99...


I0000 00:00:1715557591.282348  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557591.377525  982410 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 3.28 seconds
Detecting Video: Camera_LU.avi , frame 0 to 99...


I0000 00:00:1715557594.562314  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557594.660616  982428 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 3.27 seconds
Detecting Video: Camera_RU.avi , frame 0 to 99...


I0000 00:00:1715557597.828571  926072 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1715557597.944567  982446 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 NVIDIA 525.105.17), renderer: NVIDIA GeForce GTX 1080 Ti/PCIe/SSE2


Duration: 3.37 seconds
