<a href="https://colab.research.google.com/github/abdokamel2001/ASL-Translation-Project/blob/main/2023-10-Sprint2-Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Microsoft Dataset Sign Language Model

In [1]:
!cp -r "/content/drive/MyDrive/AI Team/Tasks/2023-10-Sprint2/MS-ASL" "/content" #Import the dataset

In [2]:
!pip install -q mediapipe pytube

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.6/33.6 MB[0m [31m45.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os
import cv2
import json
import time
import numpy as np
from tqdm import tqdm
import mediapipe as mp
import tensorflow as tf
from pytube import YouTube
import matplotlib.pyplot as plt
from IPython.core.display import clear_output

# Pick 50 Examples

In [None]:
# Nourhan's Code Here



#MediaPipe Functions

In [4]:
hands = mp.solutions.hands.Hands()
pose = mp.solutions.pose.Pose()
face_mesh = mp.solutions.face_mesh.FaceMesh()

In [5]:
def get_frame_landmarks(frame):
    """
    Extracts landmarks from a single video frame using MediaPipe.

    Args:
        frame: A single rgb frame/image.

    Returns:
        np.array: A NumPy array containing extracted landmarks.
        The output dimensions are (n, 3) array, where n is the number of landmarks.
        Each row in the array represents a landmark, and each landmark is represented
        as [x, y, z], where x, y, and z are the normalized coordinates of the landmark.
    """

    results_hands = hands.process(frame)
    results_pose = pose.process(frame)
    results_face = face_mesh.process(frame)

    num_landmarks_per_hand = 21
    num_landmarks_body_pose = 33
    num_landmarks_face = 117         # Max 468

    left_hand_landmarks = np.zeros((num_landmarks_per_hand, 3))
    right_hand_landmarks = np.zeros((num_landmarks_per_hand, 3))
    body_pose_landmarks = np.zeros((num_landmarks_body_pose, 3))
    face_landmarks = np.zeros((num_landmarks_face, 3))

    if results_hands.multi_hand_landmarks:
        for i, landmarks in enumerate(results_hands.multi_hand_landmarks):
            if i == 0:
                left_hand_landmarks = np.array([(lm.x, lm.y, lm.z) for lm in landmarks.landmark])
            elif i == 1:
                right_hand_landmarks = np.array([(lm.x, lm.y, lm.z) for lm in landmarks.landmark])

    if results_pose.pose_landmarks:
        body_pose_landmarks = np.array([(lm.x, lm.y, lm.z) for lm in results_pose.pose_landmarks.landmark])

    if results_face.multi_face_landmarks:
        # face_landmarks = np.array([(lm.x, lm.y, lm.z) for lm in results_face.multi_face_landmarks[0].landmark[::468 // num_landmarks_face]])
        face_landmarks = np.array([(lm.x, lm.y, lm.z) for lm in results_face.multi_face_landmarks[0].landmark])

    return np.vstack((left_hand_landmarks, right_hand_landmarks, body_pose_landmarks, face_landmarks))

In [6]:
def get_video_landmarks(video_path):
    """
    Extracts landmarks from a video by processing each frame in the video.

    Args:
        video_path (str): The file path to the video to process.

    Returns:
        np.array: A NumPy array where each row corresponds to the landmarks
        extracted from a single frame of the video. The dimensions of the output array
        are (m, n, 3), where m is the number of frames and n is the number of landmarks.
        Each element in the array is a 3D coordinate representing a landmark's position.
    """

    cap = cv2.VideoCapture(video_path)
    all_frame_landmarks = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret: break
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame_landmarks = get_frame_landmarks(frame_rgb)
        all_frame_landmarks.append(frame_landmarks)

    cap.release()

    return np.array(all_frame_landmarks)

# Load the json data

In [7]:
with open("/content/MS-ASL/MSASL_train.json", 'r') as json_file:
    train_data = json.load(json_file)  # A list of dictionaries

# Define directories

In [8]:
video_dir = '/content/train-videos'
npy_dir = '/content/train-numpy'
os.makedirs(video_dir, exist_ok=True)
os.makedirs(npy_dir, exist_ok=True)

# Initialize ID sets

In [21]:
downloaded_video_ids = set()
failed_video_ids = set()

for filename in os.listdir(video_dir):
    if filename.endswith('.mp4'):
        video_id = filename.split('.')[0]
        downloaded_video_ids.add(video_id)

# Iterate through the data

In [19]:
try:
    for i in tqdm(range(len(train_data)), ncols=100):
        url = train_data[i]['url']
        video_id = url.split('=')[1]
        video_path = os.path.join(video_dir, f'{video_id}.mp4')
        npy_path = os.path.join(npy_dir, f'{video_id}.npy')

        if video_id not in downloaded_video_ids and video_id not in failed_video_ids:
            try:
                yt = YouTube(url)
                stream = yt.streams.get_highest_resolution()
                stream.download(output_path=video_dir, filename=f'{video_id}.mp4')
                video_landmarks = get_video_landmarks(video_path)
                np.save(npy_path, video_landmarks)
                os.remove(video_path)
                downloaded_video_ids.add(video_id)

            except Exception as e:
                print(f"\nError downloading {video_id}: {e}")
                if os.path.exists(video_path):
                    os.remove(video_path)
                failed_video_ids.add(video_id)
                continue

        clear_output(True)

except KeyboardInterrupt:
    for f in os.listdir(video_dir):
        file_path = os.path.join(video_dir, f)
        if os.path.isfile(file_path):
            os.remove(file_path)
    print("\nLoading process interrupted by user.")

  0%|                                                         | 20/16054 [01:53<25:16:34,  5.68s/it]


Loading process interrupted by user.





In [22]:
print(f'Downloaded Videos ({len(downloaded_video_ids)}): {downloaded_video_ids}')
print(f'Failed Videos ({len(failed_video_ids)}): {failed_video_ids}')

Downloaded Videos (0): set()
Failed Videos (0): set()


---

# Useful Shortcuts

### Zip to Download

In [None]:
!zip -q -r /content/train-numpy.zip -j /content/train-numpy

### Unzip to Reload

In [None]:
!unzip -q /content/train-numpy.zip -d /content/train-numpy/

---

### Export to Drive

In [None]:
!cp -r "/content/train-numpy" "/content/drive/MyDrive/AI Team/Varying/MS-Train-Numpy"

### Import from Drive

In [None]:
!cp -r "/content/drive/MyDrive/AI Team/Varying/MS-Train-Numpy" "/content/train-numpy"

---