In [1]:
import cv2
import mediapipe as mp
import numpy as np
import json
import tflite
import tensorflow as tf
import pandas as pd
import os
import warnings
from tqdm import tqdm
from transformers import BertTokenizer, BertModel

2024-05-08 15:12:34.688735: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-08 15:12:34.688957: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-08 15:12:34.691007: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-08 15:12:34.717590: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
warnings.filterwarnings('ignore')

In [3]:
mp_holistic = mp.solutions.holistic # holistic model
mp_drawing = mp.solutions.drawing_utils # drawing utilities
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # color conversion
    image.flags.writeable = False # img no longer writeable
    pred = model.process(image) # make landmark prediction
    image.flags.writeable = True  # img now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # color reconversion
    return image, pred

In [4]:
def draw(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                              mp_drawing.DrawingSpec(color=(0,0,255), thickness=3, circle_radius=3),
                              mp_drawing.DrawingSpec(color=(0,0,0), thickness=1, circle_radius=0))
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(0,150,0), thickness=3, circle_radius=3),
                              mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(200,56,12), thickness=3, circle_radius=3),
                              mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(250,56,12), thickness=3, circle_radius=3),
                              mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=2))

In [5]:
def extract_coordinates(results):
    print(results)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]) if results.face_landmarks.landmark else np.zeros(468, 3)
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]) if results.pose_landmarks.landmark else np.zeros(33, 3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks.landmark else np.zeros(21, 3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks.landmark else np.zeros(21, 3)
    return np.concatenate([face, lh, pose, rh])

In [6]:
def load_json(path):
    with open(path, 'r') as f:
        return json.load(f)
    
class config:
    path = os.getcwd()
    seq_len = 12
    rpf = 543
    model_path = path + '/results/asl_model/model.tflite'

In [7]:
def load_relevant(path):
    data_cols = ['x', 'y', 'z']
    data = pd.read_parquet(path, columns=data_cols)
    n_frames = len(data) // config.rpf
    data = data.values.reshape(n_frames, config.rpf, 3) # len(data_cols) = 3
    return data.astype(np.float32)

In [8]:
sign_map = load_json('sign_to_prediction_index_map.json')
s2p_map = {
    k.lower(): v for k, v in sign_map.items()
}
p2s_map = {
    v: k for k, v in sign_map.items()
}
encoder = lambda x: s2p_map.get(x.lower())
decoder = lambda x: p2s_map.get(x)


In [9]:
model = tf.lite.Interpreter(model_path=config.model_path)

In [10]:
found_signs = list(model.get_signature_list().keys())
prediction_fn = model.get_signature_runner('serving_default')

In [11]:
seq = []
cap = cv2.VideoCapture(0)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        img, results = mediapipe_detection(frame, holistic)
        draw(img, results)

        landmarks = extract_coordinates(results)
        seq.append(landmarks)
        if len(seq) == 15:
            prediction = prediction_fn(inputs=load_relevant(seq))
            sign = np.argmax(prediction["outputs"])
            print(prediction)
            cv2.putText(img, f"Prediction:    {decoder(sign)}", (3, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            
        cv2.imshow('Webcam Feed',img)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()














INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


AttributeError: 'NoneType' object has no attribute 'landmark'

In [None]:
print(seq)