In [1]:
import pandas as pd
import numpy as np
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware

from signlens.preprocessing.preprocess import group_pad_sequences, decode_labels, pad_and_preprocess_sequence, reshape_processed_data_to_tf
from signlens.preprocessing.data import load_relevant_data_subset, load_data_subset_csv, load_video_list_json, load_landmarks_json
from utils.model_utils import load_model
from utils.video_utils import process_video_to_landmarks_json

2024-03-25 12:24:06.959621: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Debug with Parquet files 

In [2]:
pq_path = '/Users/max/code/benoitfrisque/signlens/raw_data/asl-signs/train_landmark_files_noface/22343/1130422569.parquet'

In [3]:
pd.read_parquet(pq_path)

Unnamed: 0,frame,row_id,type,landmark_index,x,y,z
468,61,61-left_hand-0,left_hand,0,,,
469,61,61-left_hand-1,left_hand,1,,,
470,61,61-left_hand-2,left_hand,2,,,
471,61,61-left_hand-3,left_hand,3,,,
472,61,61-left_hand-4,left_hand,4,,,
...,...,...,...,...,...,...,...
3253,66,66-right_hand-16,right_hand,16,,,
3254,66,66-right_hand-17,right_hand,17,,,
3255,66,66-right_hand-18,right_hand,18,,,
3256,66,66-right_hand-19,right_hand,19,,,


In [4]:
processed_data = preprocess_and_pad_sequences_from_pq_list(pd.Series([pq_path]))
processed_data

<tf.Tensor: shape=(1, 100, 225), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>

In [7]:
model_name = "model 20240322-173411"
model = load_model(model_name)

model = model[0]

prediction = model.predict([processed_data])

[34m
Load latest model from local registry...[0m
[34m
Load latest model from disk...[0m
✅ Model loaded from local disk /Users/max/code/benoitfrisque/signlens/training_outputs/model 20240322-173411
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


In [6]:
processed_data

<tf.Tensor: shape=(1, 100, 225), dtype=float32, numpy=
array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]], dtype=float32)>

In [8]:
prediction

array([[0.12294942, 0.05086957, 0.09900542, 0.14869675, 0.08723761,
        0.05622363, 0.1207337 , 0.11545481, 0.08714273, 0.11168633]],
      dtype=float32)

In [9]:
word = decode_labels(prediction)
word

(['chair'], array([0.14869675], dtype=float32))

## Debug with JSON files 

In [3]:
videos = load_video_list_json()

In [4]:
video_path = videos[videos.sign == 'book'].video_path.iloc[0]

In [8]:
json_data = process_video_to_landmarks_json(video_path, json_output=False, save_annotated_video=False, show_preview=False, frame_interval=1, rear_camera=True)

An error occurred: ValidatedGraphConfig Initialization failed.
ImageToTensorCalculator: ; RET_CHECK failure (mediapipe/calculators/tensor/image_to_tensor_calculator.cc:144) ValidateOptionOutputDims(options) returned INTERNAL: ; RET_CHECK failure (./mediapipe/calculators/tensor/image_to_tensor_utils.h:136) options.has_output_tensor_float_range() || options.has_output_tensor_int_range() || options.has_output_tensor_uint_range()Output tensor range is required. 
InferenceCalculatorCpu: ; RET_CHECK failure (mediapipe/calculators/tensor/inference_calculator_cpu.cc:63) !options.model_path().empty() ^ kSideInModel(cc).IsConnected()Either model as side packet or model path in options is required.
SplitDetectionVectorCalculator: The number of output streams should match the number of ranges specified in the CalculatorOptions.
ImageToTensorCalculator: ; RET_CHECK failure (mediapipe/calculators/tensor/image_to_tensor_calculator.cc:144) ValidateOptionOutputDims(options) returned INTERNAL: ; RET_CHE

In [9]:
json_data

[]

## Debug the output (return statement)

JSON path is given, needs to return a dictionary as an output

In [2]:
model_name = "model 20240322-173411"
model, _ = load_model(model_name)

[34m
Load latest model from local registry...[0m
[34m
Load latest model from disk...[0m
✅ Model loaded from local disk /Users/max/code/benoitfrisque/signlens/training_outputs/model 20240322-173411


In [3]:
landmarks_json_path = '/Users/max/code/benoitfrisque/signlens/processed_data/07070_landmarks.json'

In [4]:
landmarks = load_landmarks_json(landmarks_json_path)
landmarks

array([[[ 0.52077091,  0.28274578, -1.17921996],
        [ 0.55038935,  0.24660993, -1.13354099],
        [ 0.56552011,  0.24998254, -1.13373411],
        ...,
        [        nan,         nan,         nan],
        [        nan,         nan,         nan],
        [        nan,         nan,         nan]],

       [[ 0.52837896,  0.28276762, -1.13102567],
        [ 0.55374402,  0.24695426, -1.07339215],
        [ 0.567366  ,  0.25036377, -1.07361257],
        ...,
        [        nan,         nan,         nan],
        [        nan,         nan,         nan],
        [        nan,         nan,         nan]],

       [[ 0.53207505,  0.28231111, -1.14035559],
        [ 0.55565143,  0.2469483 , -1.08670712],
        [ 0.5685342 ,  0.25035664, -1.08691549],
        ...,
        [        nan,         nan,         nan],
        [        nan,         nan,         nan],
        [        nan,         nan,         nan]],

       ...,

       [[ 0.52725458,  0.27833903, -1.09706616],
        [ 0

In [5]:
data_processed = pad_and_preprocess_sequence (landmarks)
data_processed

array([[[ 0.52077091,  0.28274578, -1.17921996],
        [ 0.55038935,  0.24660993, -1.13354099],
        [ 0.56552011,  0.24998254, -1.13373411],
        ...,
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       [[ 0.52837896,  0.28276762, -1.13102567],
        [ 0.55374402,  0.24695426, -1.07339215],
        [ 0.567366  ,  0.25036377, -1.07361257],
        ...,
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       [[ 0.53207505,  0.28231111, -1.14035559],
        [ 0.55565143,  0.2469483 , -1.08670712],
        [ 0.5685342 ,  0.25035664, -1.08691549],
        ...,
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]],

       ...,

       [[ 0.        ,  0.        ,  0.        ],
        [ 0

In [6]:
data_tf = reshape_processed_data_to_tf(data_processed)
data_tf

<tf.Tensor: shape=(1, 100, 225), dtype=float64, numpy=
array([[[ 0.52077091,  0.28274578, -1.17921996, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.52837896,  0.28276762, -1.13102567, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.53207505,  0.28231111, -1.14035559, ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]]])>

In [7]:
prediction = model.predict(data_tf)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


array([[0.13047627, 0.04971163, 0.06018183, 0.09111148, 0.05924381,
        0.18378511, 0.16353346, 0.08900592, 0.07050943, 0.10244113]],
      dtype=float32)

In [32]:
word, proba = decode_labels(prediction)

In [33]:
word, proba

(['who'], array([0.18378511], dtype=float32))

In [34]:
word[0], proba[0]

('who', 0.18378511)

In [35]:
type(proba)

numpy.ndarray

In [36]:
type(word)

list

In [37]:
word = word[0]
word

'who'

In [38]:
proba = float(proba[0])

In [39]:
proba

0.1837851107120514