Define the Libraries

In [None]:
import os                               # operating system
import tensorflow as tf                 # tensorFlow
import cv2                              # openCV-Computer Vision
import numpy as np                      # numpyArray
from matplotlib import pyplot as plt    # plotting library
import time                             # time stamps
import mediapipe as mp                  # mediapipe

mp_holistic = mp.solutions.holistic # Holistic model is being created
mp_drawing = mp.solutions.drawing_utils # Drawing utilities are being added for MediaPipe


def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    image.flags.writeable = False                  
    results = model.process(image)                 
    image.flags.writeable = True                   
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 
    STATIC_IMAGE_MODE = True                       
    ENABLE_SEGMENTATION = True                     
    return image, results


def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS)   
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)    
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) 
    

def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 

    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(255,255,255), thickness=2, circle_radius=1)
                             ) 

    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(255,255,0), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=1)
                             ) 

    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=1)
                             ) 

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

Selection of Words and Configuration of Folders

In [None]:
# Record Folder
DATA_PATH = os.path.join('OrnekFrame1') 

# List of Words
actions=np.array(['sinifta']) 
# 'gulegule' 'gunaydin' 'hosbulduk' 'hosgeldiniz' 'merhaba' 5
# 'ozurdilerim' 'neden' 'tesekkurler' 'yapmak' 9
# 'siyah' 'kopek' 'neseli' 'beyaz' 'degil' 'hangi' 'adam' 'heyecanli' 'tembel' 18
# 'saygi' 'var' 'efendi' 'dede' 'tanimiyor' 'yer' 'kiz' kedi' 'bir' 'sinifta' 28
# 'bes' 'ogrenci' 'akilli' 'sinirli' 'yaramaz' 'hediye' 'kutuda'
# 'burda' 'yapiyor' 'dondurmaci' 'engelli'
# 'sinif' 'sakin' 'kim' 'gordu' 'ne' 'yardim' 'paylaşmak'

no_sequences = 10     # Number of videos per word
sequence_length = 30  # Number of frames per video

In [None]:
# Create folders
for action in actions: 
    for sequence in range(no_sequences):   
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

Start Creating the Dataset

In [None]:
cap = cv2.VideoCapture(0)
# Pencerenin boyutları
window_width, window_height = 480, 360

# Set Up the Mediapipe Model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # Loop for the number of sample words
    for action in actions:
        # Loop for the number of videos
        for sequence in range(no_sequences):  #emir kaan (no_sequences), berkant (10,20), mehmet emin (20,30), eren (30,40), kayra (40,50)
            # Loop for the number of video frames
            for frame_num in range(sequence_length):                
                # Read the camera
                ret, frame = cap.read()
                
                # Convert the image to MediaPipe format
                image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image.flags.writeable = False
                results = holistic.process(image)
                image.flags.writeable = True


                frame_height, frame_width, _ = frame.shape

                # If a face is detected, adjust and display the window
                if results.face_landmarks:
                    # Calculate the average of the facial keypoints
                    landmarks = results.face_landmarks.landmark
                    x_coords = [lm.x for lm in landmarks]
                    y_coords = [lm.y for lm in landmarks]
                    x_center = int(np.mean(x_coords) * frame_width)
                    y_center = int(np.mean(y_coords) * frame_height)

                    # Adjust the window
                    start_x = max(x_center - window_width // 2, 0)
                    end_x = start_x + window_width
                    if end_x > frame_width:
                        end_x = frame_width
                        start_x = frame_width - window_width

                    start_y = max(y_center - window_height // 2, 0)
                    end_y = start_y + window_height
                    if end_y > frame_height:
                        end_y = frame_height
                        start_y = frame_height - window_height

                    # Calculate the coordinates of the face center in the new window
                    new_x_center = x_center - start_x
                    new_y_center = y_center - start_y


                    # Extract and display the new window
                    window = frame[start_y:end_y, start_x:end_x]
                    window=cv2.flip(window,1)
                     # Perform detection
                    image, results = mediapipe_detection(window, holistic)

                    # Draw the keypoints
                    draw_styled_landmarks(image, results)
                    

                # To proceed to the next word
                if frame_num == 0: 
                    cv2.putText(image, 'CAPTURE STARTING', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, ' {} Kelimesi icin Video Sayisi {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)                   
                    cv2.imshow('OpenCV Beslemesi', image)
                    # Wait for 2 seconds
                    cv2.waitKey(1000)
                else: 
                    cv2.putText(image, '{} Number of Videos for the Word {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)                
                    cv2.imshow('OpenCV Beslemesi', image)
                
                # Detect, flatten, and concatenate the keypoints
                keypoints = extract_keypoints(results)
                # Set the save location
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                # Save the flattened keypoint data for each frame
                np.save(npy_path, keypoints)
                
                blank_image = np.zeros(image.shape, dtype=np.uint8)
                draw_styled_landmarks(blank_image, results)
                cv2.imwrite(f'{npy_path}.png', blank_image)

               # Çıkış
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()