In [5]:
import copy
import argparse
import os
import uuid

import cv2
import mediapipe as mp
from utils.helper_func import HelperFunc

In [7]:
gesture_type = {1: 'hand', 
                2: 'body'}

gestures = {1:'up', 
            2:'down', 
            3:'right', 
            4:'left', 
            5:'further', 
            6: 'closer', 
            7: 'land', 
            8: 'take_off', 
            9: 'photo', 
            10: 'video', 
            11: 'video_pause', 
            12: 'emergency', 
            13: 'follow', 
            14: 'palm',
            15: 'no_class'}

# Hand Gesture Preprocessing

In [8]:
min_detection_confidence = 0.5
min_tracking_confidence = 0.5

In [9]:
# Total frames in videos
image_num = 0
# Processed image number
pro_image_num = 0

In [None]:
functions = HelperFunc()
working_dir = os.path.dirname(os.getcwd()) 
videos_path = os.path.join(working_dir, 'videos' )
photos_path = os.path.join(working_dir, 'photos')

for gesture_type in os.listdir(videos_path):
    for gesture_class in os.listdir(os.path.join(videos_path,gesture_type)):
        gesture_path = os.path.join(videos_path,gesture_type, gesture_class)

        for video in os.listdir(gesture_path):
            if not gesture_type == 'body':

                # Path to the specific video
                video_path = os.path.join(gesture_path, video)

                # Capturing Video
                cap = cv2.VideoCapture(video_path)
                
                mp_drawing = mp.solutions.drawing_utils
                mp_hands = mp.solutions.hands

                # Get the frame rate of the video
                fps = cap.get(cv2.CAP_PROP_FPS)
                # Calculate the frame skip value
                frame_skip = int(fps / 1)
                # Total frames
                frame_count = 0
                
                # Wheter using pose library
                use_pose = False

                # Set True to save original and annotated frames
                save_frames = True

                # Create a directory to store the output images
                output_dir = os.path.join(photos_path,gesture_type, gesture_class)
                
                with mp_hands.Hands(
                    max_num_hands=1,
                    min_detection_confidence=min_detection_confidence,
                    min_tracking_confidence=min_tracking_confidence) as hands:

                    while cap.isOpened():
                        # Capture the frame
                        ret, frame = cap.read()

                        if not ret:
                            print("Ignoring empty camera frame.")
                            break 

                        # Create copy of frame
                        copy_frame = copy.deepcopy(frame)

                        # Convert the frame to RGB format
                        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        
                        # To improve performance, optionally mark the image as not writeable to
                        frame.flags.writeable = False

                        # Process the frame with the MediaPipe Hands class
                        results = hands.process(rgb_frame)

                        if frame_count % frame_skip == 0:
                            # Total frames in videos
                            image_num +=1
                            
                            # Check if any hands were detected
                            if results.multi_hand_landmarks:
                                for hand_landmarks in results.multi_hand_landmarks:

                                    # Check if the detected hand is Left
                                    hand_label = results.multi_handedness[0].classification[0].label
                                    if hand_label == 'Left':

                                        # Processed image number
                                        pro_image_num +=1

                                        # Landmark list calculation
                                        landmark_list = functions.calc_landmark_list(rgb_frame, hand_landmarks,use_pose)
                                        # Preprocessing landmark list
                                        pre_processed_landmark_list = functions.pre_process_landmark(landmark_list)
                                        # writing preprocessed list to the csv file
                                        functions.write_csv(functions.get_key_from_value(gestures,gesture_class), pre_processed_landmark_list,use_pose)
                                        
                                        """# Bounding box calculation
                                        brect = functions.calc_bounding_rect(rgb_frame, hand_landmarks)
                                        # Drawing the bounding box on the frame
                                        frame = functions.rect_corners(frame, brect)

                                        # Drawing the hand landmarks on the frame
                                        mp_drawing.draw_landmarks(
                                            frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                            mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=3),
                                            mp_drawing.DrawingSpec(color=(121,44,250), thickness=2))"""
                                        
                                        if save_frames: 
                                            #create unique id for every frame
                                            unique_id = uuid.uuid4()
                                            unique_id = str(unique_id)
                                            # Construct output file paths for original and anonotated images
                                            #output_ann = f"{output_dir}/{unique_id}_ann.jpg"
                                            output_orj = f"{output_dir}/{unique_id}_org.jpg"
                                            # Save the frame as a JPEG file
                                            cv2.imwrite(output_orj, copy_frame)
                                            #cv2.imwrite(output_ann, frame)

                        frame_count += 1
                        # Display the frame
                        cv2.imshow('Hand Detection', frame)

                        # Break the loop if 'q' key is pressed
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break
                    
                        print('Video {} processed successfully'.format(video_path))

                    cap.release()
                    cv2.destroyAllWindows()

In [25]:
print('Total Frames in Videos= {}\nTotal Processed Frames = {}\nNumber of Useless Frames = {}'.format(image_num,pro_image_num,(image_num-pro_image_num)))

Total Frames in Videos= 789
Total Processed Frames = 786
Number of Useless Frames = 3


# Body Gesture Preprocessing

In [26]:
min_detection_confidence = 0.5
min_tracking_confidence = 0.5

In [27]:
# Total frames in videos
image_num_pose = 0
# Processed image number
pro_image_num_pose = 0

In [None]:
functions = HelperFunc()
working_dir = os.path.dirname(os.getcwd()) 
videos_path = os.path.join(working_dir, 'videos' )
photos_path = os.path.join(working_dir, 'photos')

for gesture_type in os.listdir(videos_path):
    for gesture_class in os.listdir(os.path.join(videos_path,gesture_type)):
        gesture_path = os.path.join(videos_path,gesture_type, gesture_class)

        for video in os.listdir(gesture_path):
            if not gesture_type == 'hand':


                # Path to the specific video
                video_path = os.path.join(gesture_path, video)

                # Capturing Video
                cap = cv2.VideoCapture(video_path)

                mp_drawing = mp.solutions.drawing_utils
                mp_pose = mp.solutions.pose
                                
                # Get the frame rate of the video
                fps = cap.get(cv2.CAP_PROP_FPS)
                # Calculate the frame skip value
                frame_skip = int(fps / 1)
                # Total frames
                frame_count = 0

                # Set True to save original and annotated frames
                save_frames = True

                # Create a directory to store the output images
                output_dir = os.path.join(photos_path,gesture_type, gesture_class)

                # Wheter to use bounding box
                use_brect = True
                
                # Wheter using pose library
                use_pose = True

                with mp_pose.Pose(
                    min_detection_confidence=min_detection_confidence,
                    min_tracking_confidence=min_tracking_confidence) as pose:
                    
                    while cap.isOpened():
                        # Capture the frame
                        ret, frame = cap.read()

                        if not ret:
                            print("Ignoring empty camera frame.")
                            break
                        
                        # Create copy of frame
                        copy_frame = copy.deepcopy(frame)

                        # Convert the frame to RGB format
                        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                        # To improve performance, optionally mark the image as not writeable to
                        frame.flags.writeable = False

                        # Process the frame with the MediaPipe Pose class
                        results = pose.process(rgb_frame)
                        
                        if frame_count % frame_skip == 0:
                            # Total frames in videos
                            image_num_pose +=1

                            if results.pose_landmarks:
                                # Processed image number
                                pro_image_num_pose +=1

                                # Landmark list calculation
                                landmark_list = functions.calc_landmark_list(rgb_frame, results.pose_landmarks,use_pose)
                                # Preprocessing landmark list
                                pre_processed_landmark_list = functions.pre_process_landmark(landmark_list)
                                # writing preprocessed list to the csv file
                                functions.write_csv(functions.get_key_from_value(gestures,gesture_class), pre_processed_landmark_list,use_pose)


                                """# Bounding box calculation
                                brect = functions.calc_bounding_rect(rgb_frame, results.pose_landmarks)
                                # Drawing the bounding box on the frame
                                frame = functions.rect_corners(frame, brect)

                                # Draw the pose annotation on the image.
                                frame.flags.writeable = True

                                mp_drawing.draw_landmarks(
                                    frame,
                                    results.pose_landmarks,
                                    mp_pose.POSE_CONNECTIONS,
                                    connection_drawing_spec = mp_drawing.DrawingSpec(color=(121,44,250), thickness=2),
                                    landmark_drawing_spec=mp_drawing.DrawingSpec(color=(121,22,76), thickness=2,circle_radius=2))
                                """

                                if save_frames: 
                                    #create unique id for every frame
                                    unique_id = uuid.uuid4()
                                    unique_id = str(unique_id)
                                    # Construct output file paths for original and anonotated images
                                    #output_ann = f"{output_dir}/{unique_id}_ann.jpg"
                                    output_orj = f"{output_dir}/{unique_id}_org.jpg"
                                    # Save the frame as a JPEG file
                                    cv2.imwrite(output_orj, copy_frame)
                                    #cv2.imwrite(output_ann, frame)

                        frame_count += 1
                        # Display the frame
                        cv2.imshow('Pose Detection', frame)

                        # Break the loop if 'q' key is pressed
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                    print('Video {} processed successfully'.format(video_path))

                    cap.release()
                    cv2.destroyAllWindows()

In [31]:
print('Total Frames in Videos = {}\nTotal Processed Frames = {}\nNumber of Useless Frames = {}'.format(image_num_pose,pro_image_num_pose,(image_num_pose-pro_image_num_pose)))

Total Frames in Videos = 1849
Total Processed Frames = 1844
Number of Useless Frames = 5
