# Testing - Prerecorded Video to ASL Translation

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
import pandas as pd

mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

interpreter = tf.lite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()





In [3]:
import json

def load_label_map(json_file_path):
    with open(json_file_path, 'r') as file:
        label_map = json.load(file)
    index_to_label = {v: k for k, v in label_map.items()}
    return index_to_label

index_to_label = load_label_map("sign_to_prediction_index_map.json")


In [4]:
index_to_label

{0: 'TV',
 1: 'after',
 2: 'airplane',
 3: 'all',
 4: 'alligator',
 5: 'animal',
 6: 'another',
 7: 'any',
 8: 'apple',
 9: 'arm',
 10: 'aunt',
 11: 'awake',
 12: 'backyard',
 13: 'bad',
 14: 'balloon',
 15: 'bath',
 16: 'because',
 17: 'bed',
 18: 'bedroom',
 19: 'bee',
 20: 'before',
 21: 'beside',
 22: 'better',
 23: 'bird',
 24: 'black',
 25: 'blow',
 26: 'blue',
 27: 'boat',
 28: 'book',
 29: 'boy',
 30: 'brother',
 31: 'brown',
 32: 'bug',
 33: 'bye',
 34: 'callonphone',
 35: 'can',
 36: 'car',
 37: 'carrot',
 38: 'cat',
 39: 'cereal',
 40: 'chair',
 41: 'cheek',
 42: 'child',
 43: 'chin',
 44: 'chocolate',
 45: 'clean',
 46: 'close',
 47: 'closet',
 48: 'cloud',
 49: 'clown',
 50: 'cow',
 51: 'cowboy',
 52: 'cry',
 53: 'cut',
 54: 'cute',
 55: 'dad',
 56: 'dance',
 57: 'dirty',
 58: 'dog',
 59: 'doll',
 60: 'donkey',
 61: 'down',
 62: 'drawer',
 63: 'drink',
 64: 'drop',
 65: 'dry',
 66: 'dryer',
 67: 'duck',
 68: 'ear',
 69: 'elephant',
 70: 'empty',
 71: 'every',
 72: 'eye',
 

In [5]:
import time
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic
holistic = mp_holistic.Holistic(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

cap = cv2.VideoCapture(0)
fps = cap.get(cv2.CAP_PROP_FPS)
print(fps)
num_frames = int(fps * 3)

frames_data = []

output_file = 'annotated_video.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter(output_file, fourcc, fps, (int(cap.get(3)), int(cap.get(4))))

frames_processed = 0
while frames_processed < num_frames:
    ret, frame = cap.read()
    if not ret:
        break
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = holistic.process(frame_rgb)

    annotated_frame = frame.copy()
    mp_drawing.draw_landmarks(
        annotated_frame,
        results.face_landmarks,
        mp_holistic.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
    mp_drawing.draw_landmarks(
        annotated_frame,
        results.pose_landmarks,
        mp_holistic.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
    mp_drawing.draw_landmarks(
        annotated_frame,
        results.left_hand_landmarks,
        mp_holistic.HAND_CONNECTIONS,
        landmark_drawing_spec=mp_drawing_styles.get_default_hand_landmarks_style())

    output_video.write(annotated_frame)

    frames_data.append(results)
    frames_processed += 1

cap.release()
output_video.release()
cv2.destroyAllWindows()
results


30.0


mediapipe.python.solution_base.SolutionOutputs

In [6]:
def extract_landmarks(results):
    landmarks = {'face': results.face_landmarks, 'left_hand': results.left_hand_landmarks,
                 'pose': results.pose_landmarks, 'right_hand': results.right_hand_landmarks}
    all_landmarks = []
    for key, result in landmarks.items():
        num_landmarks = {'face': 468, 'left_hand': 21, 'pose': 33, 'right_hand': 21}[key]
        if result is None:
            all_landmarks.extend([(0, 0, 0)] * num_landmarks)
        else:
            all_landmarks.extend([(landmark.x, landmark.y, landmark.z) for landmark in result.landmark])
    return all_landmarks

landmarks_list = [extract_landmarks(results) for results in frames_data]
print(landmarks_list)
flat_list = [item for sublist in landmarks_list for item in sublist] 
df = pd.DataFrame(flat_list, columns=['x', 'y', 'z'])
df.to_parquet('landmarks.parquet', index=False)


[[(0.4601593613624573, 0.9012752175331116, -0.024704257026314735), (0.4652996063232422, 0.8480294346809387, -0.06310642510652542), (0.4604942500591278, 0.8611355423927307, -0.029065903276205063), (0.45193469524383545, 0.791523277759552, -0.05587083101272583), (0.4659886658191681, 0.8320431113243103, -0.06916998326778412), (0.46493810415267944, 0.8091124892234802, -0.06713934242725372), (0.460207462310791, 0.7509894371032715, -0.04315876215696335), (0.36394670605659485, 0.7371358275413513, -0.009187482297420502), (0.45929357409477234, 0.7148955464363098, -0.04201469197869301), (0.4599354565143585, 0.6938524842262268, -0.048299629241228104), (0.4585619270801544, 0.6007824540138245, -0.047217562794685364), (0.45989522337913513, 0.9095157384872437, -0.021824637427926064), (0.4589846730232239, 0.915095329284668, -0.016917433589696884), (0.4579498767852783, 0.9165352582931519, -0.01075395755469799), (0.45791319012641907, 0.9185997247695923, -0.007085540797561407), (0.45817816257476807, 0.925

In [13]:
ROWS_PER_FRAME = 543  # Number of rows per frame

def load_relevant_data_subset(pq_path):
    data_columns = ['x', 'y', 'z']
    data = pd.read_parquet(pq_path, columns=data_columns)
    n_frames = int(len(data) / ROWS_PER_FRAME)
    data = data.values.reshape(n_frames, ROWS_PER_FRAME, len(data_columns))
    return data.astype(np.float32)


In [14]:

xyz = load_relevant_data_subset('landmarks.parquet')
prediction_fn = interpreter.get_signature_runner('serving_default')
output = prediction_fn(inputs=xyz)
p = output['outputs'].reshape(-1)
predicted_class_index = np.argmax(p)
predicted_class_label = index_to_label[predicted_class_index]
print(f"Predicted class label: {predicted_class_label}")


Predicted class label: lion


In [15]:
def annotate_video_with_label(video_file):
    cap = cv2.VideoCapture(video_file)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
    out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (640, 480))

    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret==True:
            label = "Predicted: " + predicted_class_label
            cv2.putText(frame, label, (10, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2, cv2.LINE_AA)
            out.write(frame)
            cv2.imshow('frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            break
    cap.release()
    out.release()
    cv2.destroyAllWindows()

annotate_video_with_label('annotated_video.mp4')
