In [1]:
import mediapipe as mp
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import json

2023-10-02 20:00:47.175878: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:

selected_columns = pd.read_json('./results2nd/inference_args.json')
def load_relevant_data_subset(pq_path):
    return pd.read_parquet(pq_path, columns=selected_columns['selected_columns'])
    #return pd.read_parquet(pq_path)

def predict(xyz_np, prediction_fn, rev_character_map):
    

    #pq_file = './output.parquet'
    #xyz_np = load_relevant_data_subset(pq_file)
    xyz_np = xyz_np.loc[:, selected_columns['selected_columns']]
    xyz_np = xyz_np.astype('float32')
    prediction = prediction_fn(inputs=xyz_np.values)

   
    prediction_str = "".join([rev_character_map.get(s, "") for s in np.argmax(prediction['outputs'], axis=1)])

    return prediction_str

def load_model():
    interpreter = tf.lite.Interpreter('./results2nd/model.tflite')
    found_signatures = list(interpreter.get_signature_list().keys())
    prediction_fn = interpreter.get_signature_runner("serving_default")

    with open ("./character_to_prediction_index.json", "r") as f:
        character_map = json.load(f)
    rev_character_map = {j:i for i,j in character_map.items()}

    return prediction_fn, rev_character_map

mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic

def create_frame_landmark_df(results, frame, cols):
    landmarks = pd.DataFrame()
   

    landmarks.loc[0, cols] = np.nan
    landmarks = landmarks.assign(frame=frame)
    
    if results.face_landmarks:
        for i, point in enumerate(results.face_landmarks.landmark):
            landmarks.loc[0, [f'x_face_{i}', f'y_face_{i}', f'z_face_{i}']] = [point.x, point.y, point.z]
    if results.pose_landmarks:
        for i, point in enumerate(results.pose_landmarks.landmark):
            landmarks.loc[0, [f'x_pose_{i}', f'y_pose_{i}', f'z_pose_{i}']] = [point.x, point.y, point.z]
    if results.left_hand_landmarks:
        for i, point in enumerate(results.left_hand_landmarks.landmark):
            landmarks.loc[0, [f'x_left_hand_{i}', f'y_left_hand_{i}', f'z_left_hand_{i}']] = [point.x, point.y, point.z]
    if results.right_hand_landmarks:
        for i, point in enumerate(results.right_hand_landmarks.landmark):
            landmarks.loc[0, [f'x_right_hand_{i}', f'y_right_hand_{i}', f'z_right_hand_{i}']] = [point.x, point.y, point.z]
    
    return landmarks

def do_capture_loop(cols):

    # load model
    prediction_fn, rev_character_map = load_model()
    #try:
    # For webcam input:
    landmarks_all = []
    text = ""
    cap = cv2.VideoCapture(0)
    with mp_holistic.Holistic(
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as holistic:
        frame = 0
        while cap.isOpened():
            frame += 1
            
            success, image = cap.read()
            if not success:
                print("Ignoring empty camera frame.")
        # If loading a video, use 'break' instead of 'continue'.
                continue

            # To improve performance, optionally mark the image as not writeable to
            # pass by reference.
            image.flags.writeable = False
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            results = holistic.process(image)
            
            # Create landmark df
            landmarks = create_frame_landmark_df(results, frame, cols)
            landmarks_all.append(landmarks)
            
            if frame%2 == 0:
                #landmarks = pd.concat(landmarks_all).reset_index(drop=True).to_parquet('output.parquet')
                landmarks = pd.concat(landmarks_all).reset_index(drop=True)
                # predict
                predict_str = predict(landmarks, prediction_fn, rev_character_map)
                text = predict_str
                print(f"frame: {frame}, pred: {predict_str}")
                
            
            # Draw landmark annotation on the image.
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
            '''
            mp_drawing.draw_landmarks(
                image,
                results.face_landmarks,
                mp_holistic.FACEMESH_CONTOURS,
                landmark_drawing_spec=None,
                connection_drawing_spec=mp_drawing_styles
                .get_default_face_mesh_contours_style())
            mp_drawing.draw_landmarks(
                image,
                results.pose_landmarks,
                mp_holistic.POSE_CONNECTIONS,
                landmark_drawing_spec=mp_drawing_styles
                .get_default_pose_landmarks_style())
            '''
            
            # Rectangle properties
            rect_x = 0
            rect_y = 500  # Adjust this value to control the position of the rectangle
            rect_width = 400
            rect_height = 200
            rect_color = (0, 0, 0)  # Black color in BGR format


            # Define the text and background colors
            text_color = (255, 255, 255)  # White text in BGR format
            background_color = (0, 0, 0)  # Black background in BGR format

            # Add text to the frame with a black background
            
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 1
            thickness = 1

            # Calculate text position to center it in the rectangle
            (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)
            text_x = (rect_width - text_width) // 2 + rect_x
            text_y = (rect_height - text_height) // 2 + rect_y + text_height


            # Create a black rectangle as a background for the text
       
            cv2.rectangle(image, (rect_x, rect_y), (rect_x + rect_width, rect_y + rect_height), rect_color, -1)

            # Put the white text on the black background
            frame_with_text = cv2.putText(image, text, (text_x, text_y), font, font_scale, text_color, thickness)
        
            # Flip the image horizontally for a selfie-view display.
            #cv2.imshow('MediaPipe Holistic', cv2.flip(image, 1))

            

            cv2.imshow('MediaPipe Holistic', frame_with_text)
            if cv2.waitKey(5) & 0xFF == 27:
                break
    #except:
    #   return landmarks_all
    cap.release()
    cv2.destroyAllWindows()
    return landmarks_all

if __name__ == "__main__":
    pq_file = './input/train_landmarks/1255240050.parquet'
    xyz = pd.read_parquet(pq_file)
    do_capture_loop(xyz.columns[1:])
    #print(landmarks)
    #landmarks = pd.concat(landmarks).reset_index(drop=True).to_parquet('output.parquet')

frame: 2, pred:  
frame: 4, pred: 1
frame: 6, pred: 1
frame: 8, pred: 121
frame: 10, pred: 1212
frame: 12, pred: 1212 
frame: 14, pred: 12a
frame: 16, pred: 123
frame: 18, pred: 12 3
frame: 20, pred: 12 34
frame: 22, pred: 12 a34
frame: 24, pred: 12 a345
frame: 26, pred: 12 a345
frame: 28, pred: 12 a345m
frame: 30, pred: 12 a345m
frame: 32, pred: 12a345 m
frame: 34, pred: 12 a345 mv
frame: 36, pred: 12a345 mv
frame: 38, pred: 12a345 m21
frame: 40, pred: 12a345 m21
frame: 42, pred: 12a345 m21
frame: 44, pred: 12a345 m21
frame: 46, pred: 12a345 m21
frame: 48, pred: 12a345 m21
frame: 50, pred: 12a345 mv1
frame: 52, pred: 12a345 m21c
frame: 54, pred: 12a345 m21 c
frame: 56, pred: 12a345 m21 c
frame: 58, pred: 12a345 m21 c
frame: 60, pred: 12a345 m21 c
frame: 62, pred: 12a345 m21 c
frame: 64, pred: 12a345 m21 c


: 