In [1]:
!pip install tensorflow  opencv-python mediapipe sklearn matplotlib pillow



In [26]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from PIL import Image, ImageDraw, ImageFont

In [27]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
mp_drawing_styles = mp.solutions.drawing_styles

In [28]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    print( "left",results.left_hand_landmarks)
    print( "right",results.right_hand_landmarks)
    return image, results

In [29]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [30]:
def draw_styled_landmarks(image, results):
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [31]:
def extract_keypoints(results):
    print(results)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([lh, rh])

In [None]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        print(np.array(results).shape)
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', cv2.flip(image, 1))

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [None]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 
current_directory = os.getcwd()
# Actions that we try to detect
actions = np.array(['අ', 'ආ', 'ඇ','ඈ','ඉ','ඊ','උ','ඌ','එ','ඒ','ඔ','ඕ','ක්','ග්','ජ්','ට්','ද්','ණ්','ත්','ඩ්','න්','ප්','බ්','ම්','ය්','ර්','ල්','ව්','ස්','හ්'])
# actions = np.array(['අ', 'ආ', 'ඇ','ඈ','ඉ'])	
# actions = np.array(['ඊ','උ','ඌ','එ','ඒ'])
# Thirty videos worth of data
no_sequences = 30

# Each One Video  going to be 30 frames in length
sequence_length = 30

# Folder start
start_folder = 0

In [None]:
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [None]:
npy_path = os.path.join(current_directory,DATA_PATH, action, '0', 'kk')
np.save(npy_path,[])

print("Current Directory:", npy_path)

In [None]:
cap = cv2.VideoCapture(0)
font = ImageFont.truetype("D:\Research\sinahlaFont.ttf", 15)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(start_folder, start_folder+no_sequences):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    # Convert  Numpy array and switch  from BGR to RGB
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    pil_image = Image.fromarray(image)
                    draw = ImageDraw.Draw(pil_image)
                    dynamic_text = "Collecting frames for "+action+" Video Number " +  str(sequence);
                    draw.text((10, 10),dynamic_text, font=font)

                    
                    # Convert back to Numpy array and switch back from RGB to BGR
                    converted_image = np.asarray(pil_image)
                    cv2image = cv2.cvtColor(converted_image, cv2.COLOR_RGB2BGR)
                    
                    # Show to screen
                    cv2.imshow('OpenCV Feed', cv2image)
                    cv2.waitKey(750)
                else:          
                     # Convert  Numpy array and switch  from BGR to RGB
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    pil_image = Image.fromarray(image)
                    draw = ImageDraw.Draw(pil_image)
                    dynamic_text = "Collecting frames for "+action+" Video Number " +  str(sequence);
                    draw.text((10, 10),dynamic_text, font=font)
                    
                    # Convert back to Numpy array and switch back from RGB to BGR
                    converted_image = np.asarray(pil_image)
                    cv2image = cv2.cvtColor(converted_image, cv2.COLOR_RGB2BGR)

                    
                    # Show to screen
                    cv2.imshow('OpenCV Feed',cv2image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

left None
right None
<class 'mediapipe.python.solution_base.SolutionOutputs'>
left None
right None
<class 'mediapipe.python.solution_base.SolutionOutputs'>
left None
right None
<class 'mediapipe.python.solution_base.SolutionOutputs'>
left None
right None
<class 'mediapipe.python.solution_base.SolutionOutputs'>
left landmark {
  x: 0.9662972
  y: 0.87671304
  z: -8.7430465e-07
}
landmark {
  x: 0.898052
  y: 0.7608906
  z: 0.028841758
}
landmark {
  x: 0.8906899
  y: 0.65354466
  z: 0.02817474
}
landmark {
  x: 0.8819992
  y: 0.5770262
  z: 0.017917247
}
landmark {
  x: 0.87600493
  y: 0.5298727
  z: 0.010584885
}
landmark {
  x: 1.0000129
  y: 0.5503332
  z: 0.016143333
}
landmark {
  x: 0.8783037
  y: 0.4687199
  z: -0.015163444
}
landmark {
  x: 0.8612462
  y: 0.5399089
  z: -0.031193178
}
landmark {
  x: 0.885876
  y: 0.5941681
  z: -0.037699856
}
landmark {
  x: 1.0033797
  y: 0.5600378
  z: -0.011379374
}
landmark {
  x: 0.856178
  y: 0.47832274
  z: -0.03913432
}
landmark {
  x: 

In [23]:
cap.release()
cv2.destroyAllWindows()