## Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import sklearn

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

from pathlib import Path
from IPython.display import Image, display, Video, HTML
from ipywidgets import interact, widgets

from signlens.params import *
from signlens.preprocessing import data, preprocess
from utils import plot_landmarks, model_utils

# reload automatically python functions outside notebook
%load_ext autoreload
%autoreload 2

2024-03-22 10:04:37.467950: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Fetch data

In [2]:
preprocess.unique_train_test_split()

[34mLoading unqiue test set with test_size=0.2[0m
/home/bfrisque/code/benoitfrisque/signlens/raw_data/asl-signs/train.csv
✅ File with frames already exists, loaded matching 'sequence_id' rows.
> [0;32m/home/bfrisque/code/benoitfrisque/signlens/signlens/preprocessing/data.py[0m(53)[0;36mload_data_subset_csv[0;34m()[0m
[0;32m     52 [0;31m    [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 53 [0;31m    [0mtrain[0m [0;34m=[0m [0mfilter_sequences_with_missing_frames[0m[0;34m([0m[0mtrain[0m[0;34m)[0m [0;31m# Filter out sequences with missing frames[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     54 [0;31m[0;34m[0m[0m
[0m


ipdb>  c


✅ Filtered on n_frames = 100. Size reduced from 94477 to 86168 (91.2%)
✅ Filtered on n_classes = 250. Size reduced from 86168 to 86168 (100.0%)
✅ Balanced data, with average of 68.932 elements per class. Size reduced from 86168 to 17233 (20.0%)
✅ Loaded 86168 rows (18.2% of the original 94477 rows) from the dataset.
[34m
Loading training test set[0m
/home/bfrisque/code/benoitfrisque/signlens/raw_data/asl-signs/train.csv
✅ File with frames already exists, loaded matching 'sequence_id' rows.
> [0;32m/home/bfrisque/code/benoitfrisque/signlens/signlens/preprocessing/data.py[0m(53)[0;36mload_data_subset_csv[0;34m()[0m
[0;32m     52 [0;31m    [0;32mimport[0m [0mipdb[0m[0;34m;[0m [0mipdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 53 [0;31m    [0mtrain[0m [0;34m=[0m [0mfilter_sequences_with_missing_frames[0m[0;34m([0m[0mtrain[0m[0;34m)[0m [0;31m# Filter out sequences with missing frames[0m[0;34m[0m[0;34m

ipdb>  c


✅ Filtered on n_classes = 250. Size reduced from 94477 to 94461 (100.0%)
✅ Loaded 94461 rows (100.0% of the original 94477 rows) from the dataset.
[34m
Total loaded rows : 94461         
Total training rows : 94461 (100.0%)         
Total test rows : 17233 (18.2%)[0m
[34m
Train and test data saved at /home/bfrisque/code/benoitfrisque/signlens/raw_data/asl-signs/train_train.csv and /home/bfrisque/code/benoitfrisque/signlens/raw_data/asl-signs/train_test.csv[0m


In [9]:
preprocess.load_data_subset_csv()

[34mLoading data subset from train_train.csv[0m
✅ Filtered on n_frames = 100. Size reduced from 94461 to 85625 (90.6%)
✅ Filtered on n_classes = 10. Size reduced from 85625 to 1738 (2.0%)
✅ Loaded 1738 rows (1.8% of the original 94461 rows) from the dataset.


Unnamed: 0,path,participant_id,sequence_id,sign,n_frames,n_frames2,file_path
0,train_landmark_files_noface/29302/3215874043.p...,29302.0,3215874043,grandpa,52.0,52.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
1,train_landmark_files_noface/27610/3908234863.p...,27610.0,3908234863,find,52.0,52.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
2,train_landmark_files_noface/37779/905455681.pa...,37779.0,905455681,potty,6.0,6.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
3,train_landmark_files_noface/32319/3409155932.p...,32319.0,3409155932,grandpa,55.0,55.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
4,train_landmark_files_noface/30680/1401028871.p...,30680.0,1401028871,scissors,30.0,30.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
...,...,...,...,...,...,...,...
1733,train_landmark_files_noface/2044/2278613168.pa...,2044.0,2278613168,hot,9.0,9.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
1734,train_landmark_files_noface/30680/3074254078.p...,30680.0,3074254078,scissors,4.0,6.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
1735,train_landmark_files_noface/36257/2267290870.p...,36257.0,2267290870,mitten,20.0,20.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
1736,train_landmark_files_noface/22343/4279148451.p...,22343.0,4279148451,grass,16.0,16.0,/home/bfrisque/code/benoitfrisque/signlens/raw...
