### Importing Necessary Packages


In [7]:
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import os
import math
import mediapipe as mp

import csv


##### Some mediapipe witchery

In [8]:
# MediaPipe Pose
mp_drawing = mp.solutions.drawing_utils # Drawing helpers
mp_holistic = mp.solutions.holistic # Mediapipe Solutions


##### working with the data

In [9]:
def extract_coordinates(video_path, video_url, class_name,start_frame, end_frame):

    if not os.path.exists("data_four_labels/{}".format(class_name)):
        os.makedirs("data_four_labels/{}".format(class_name))
        
    
    #creating empty file in folder, I added the start_time in the name of the csv file, so that if a symbol appears many times in a video, it will still be created in two different csv files, just that they will have different starting times
    csv_file = f"data_four_labels/{class_name}/{video_url}{start_frame}.csv"
    if os.path.exists(csv_file):
        return 


# Setup CSV File for the videos
# 21 right hand landmarks, 21 left hand landmarks, 33 pose landmarks
    num_coords = 21 + 21 + 33
    landmarks = ['class']
    for val in range(1, num_coords+1):
        landmarks += ['x{}'.format(val), 'y{}'.format(val), 'z{}'.format(val), 'v{}'.format(val)]
    print("Initialized an empty landmarks of size:", len(landmarks))

    with open(csv_file, mode='w', newline='') as f:
        csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(landmarks)
        


#working with each video
    cap = cv2.VideoCapture(os.path.join(video_path, video_url)+".mp4")
    if (cap.isOpened()== False): 
        print("Error opening video stream or file")
    # Read until video is completed
    else: 
        print("Currently working with video: ",  video_url, " ", class_name)
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

            frameNr=0

            while(cap.isOpened()):
                    
                # Capture frame-by-frame
                ret, frame = cap.read() 
                if ret == True:

                    ##!!! start countin the number of frames AFTER ret== True (which means that the frames WERE captured, therefore, you can increment)
                    frameNr+=1

                    if frameNr<start_frame:
                        # print(frameNr)
                        continue

                    if frameNr>end_frame:
                        # print(frameNr)
                        break

                    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    image.flags.writeable = False    
                    results = holistic.process(image)
                    # Display the resulting frame
                    # Right hand
                    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                                            mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                                            )

                    # Left Hand
                    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                                            mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                                            )

                    # Pose Detections
                    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                                            mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                                            mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                                            )
                    cv2.imshow('Frame',image)
                    # Press Q on keyboard to  exit
                    if cv2.waitKey(25) & 0xFF == ord('q'):
                        break
                    # Export coordinates
                    try:
                        # Extract Pose landmarks
                        if results.pose_landmarks:
                            pose = results.pose_landmarks.landmark
                            pose_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in pose]).flatten())
                        else:
                            # continue
                            pose_row=list(np.array([[0,0,0,0] for i in range(33)]).flatten())
                        # Extract hands landmarks
                        if results.right_hand_landmarks:
                            right_hand = results.right_hand_landmarks.landmark
                            right_hand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in right_hand]).flatten())
                        else:
                            #If no right hand detected, then it writes 0 to the CSV file
                            right_hand_row = list(np.array([[0,0,0,0] for i in range(21)]).flatten())
                        if results.left_hand_landmarks:
                            left_hand = results.left_hand_landmarks.landmark
                            left_hand_row = list(np.array([[landmark.x, landmark.y, landmark.z, landmark.visibility] for landmark in left_hand]).flatten())
                        else:
                            #If no left hand detected, then it writes 0 to the CSV file
                            left_hand_row = list(np.array([[0,0,0,0] for i in range(21)]).flatten())

                        # Concate rows
                        row = pose_row + right_hand_row + left_hand_row
                        # Append class name 
                        row.insert(0, class_name)

                        # Export to CSV
                        with open(csv_file, mode='a', newline='') as f:
                            csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
                            csv_writer.writerow(row) 

                    except Exception as e:
                        print(str(e) +"for ", class_name, " ", video_url )
                        continue
                         
                    

            
                else:
                    break

            # When everything done, release the video capture object
            cap.release()
            # Closes all the frames
            cv2.destroyAllWindows()
            return results


In [10]:
#single test
#[video_path, video_url, label]
if not os.path.exists("data_four_labels"):
    os.makedirs("data_four_labels")
# extract_coordinates("combined-dataset/", "CxTSVyM-ij0", "morning", 1050,1140)
# extract_coordinates("../combined-dataset/", "CxTSVyM-ij0", "morning", 1050,1140)


Initialized an empty landmarks of size: 301
Currently working with video:  CxTSVyM-ij0   morning


mediapipe.python.solution_base.SolutionOutputs

#### working with the MSASL json file

In [11]:
test_df = pd.read_json("MSASL-combined.json")
test_df.head()

Unnamed: 0,org_text,clean_text,start_time,signer_id,signer,start,end,file,label,height,fps,end_time,url,text,box,width,review,available
0,absent,absent,0.0,114,-1,0,37,ASL ABSENT,837,360,28.971,1.277,https://www.youtube.com/watch?v=ri3NrdgfAtE,absent,"[0.21896389130000002, 0.008568197500000001, 0....",202,,False
1,help,help,0.0,76,42,0,110,help 2,50,360,29.97,3.67,www.youtube.com/watch?v=l31UXgChCS4,help,"[0.0503727198, 0.2994125783, 1, 0.6968145967]",640,,True
2,come on,come on,0.0,114,-1,0,41,asl come on,889,360,25.0,1.64,https://www.youtube.com/watch?v=pt9bV_EvcaU,come on,"[0.08946925400000001, 0.1794851124, 0.99899017...",480,,False
3,LANGUAGE,language,0.0,3,-1,0,56,LANGUAGE(3),513,360,15.0,3.733,https://www.youtube.com/watch?v=-j1wozf6o9w,language,"[0.1770857871, 0.0036684573, 1, 0.9955700636]",480,,True
4,confused,confused,0.0,53,-1,0,95,ASL Confused,272,360,29.969,3.17,https://www.youtube.com/watch?v=y8tHmOQcCwU,confused,"[0.0626253188, 0.209987849, 1, 0.7668771744]",640,,False


In [12]:
 #creating empty folder with data
if not os.path.exists("data_four_labels"):
    os.makedirs("data_four_labels")
    
for idx in range(0, len(test_df)-1):
    if test_df.loc[idx, "available"] == False:
        continue

    video_url = test_df.loc[idx, "url"][-11:]

    start_frame = test_df.loc[idx, "start"]

    end_frame = test_df.loc[idx, "end"]
    
    fps = test_df.loc[idx, "fps"]

    label = test_df.loc[idx, "clean_text"]
    str(label)
    label = label.replace("/", " ")

    if label[-1] == " ":
        label[:-1]
    

    four_labels = ["coffee", "milk", "door", "dog"]
    if label in four_labels:    
        #give the progress by every 50 videos
        if idx%50==0:
            print(idx)
        extract_coordinates(video_path = "combined-dataset/", video_url = video_url, class_name = label, start_frame = start_frame, end_frame = end_frame)
    else :
        continue
    

Initialized an empty landmarks of size: 301
Currently working with video:  g59_3vr_NT0   dog
Initialized an empty landmarks of size: 301
Currently working with video:  eeHS78JyN70   coffee
Initialized an empty landmarks of size: 301
Currently working with video:  eeHS78JyN70   coffee
Initialized an empty landmarks of size: 301
Currently working with video:  n0PdKty8WRA   coffee
Initialized an empty landmarks of size: 301
Currently working with video:  n0PdKty8WRA   milk
Initialized an empty landmarks of size: 301
Currently working with video:  CmKYKBQuHf4   milk
Initialized an empty landmarks of size: 301
Currently working with video:  UjiCWH8PSvM   door
Initialized an empty landmarks of size: 301
Currently working with video:  _wijo648v0g   milk
Initialized an empty landmarks of size: 301
Currently working with video:  _wijo648v0g   coffee
Initialized an empty landmarks of size: 301
Currently working with video:  2VB3WN8adyM   door
Initialized an empty landmarks of size: 301
Currently