# Context
Phase one of this project is feature extraction.\
This notebook drives the feature extraction process.

In [1]:
# Set project's environment variables
import os
import sys
from dotenv import load_dotenv
load_dotenv(dotenv_path="../project.env")
sys.path.append(os.environ["PYTHONPATH"])

In [2]:
# Import project-wide and PH1 specific variables and functions
import superheader as sup
import PH1header as ph1



Chosen class grouping: all-classes


Directory /Users/diego/Desktop/iteso/TOG/ exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/src exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/bin exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/media exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/scores exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/raw/all-classes exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/PH1/all-classes exists. Continuing with execution


I0000 00:00:1749424926.421287  476667 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M3 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1749424926.427138  476730 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749424926.431166  476738 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1749424926.432528  476667 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M3 Pro
W0000 00:00:1749424926.495467  476741 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1749424926.502142  476741 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for fe

# Local filesystem to pandas dataframe
The first step we take is to read the data from our filesystem into a pandas dataframe.
We use nltk to help us in this process.

In [3]:
from nltk.corpus.reader import CategorizedPlaintextCorpusReader
import pandas as pd

## NLTK Corpus

In [4]:
RAWcorpus = CategorizedPlaintextCorpusReader(sup.RAW_DATA_ROOT, fileids=rf".*_({sup.CLASSES_REGEX_GROUP})\.mp4$", cat_pattern=r"(.*)/")

## Pandas DataFrame

In [5]:
RAWlist = []

for fileid in RAWcorpus.fileids():
  RAWlist.append(ph1.get_tags(fileid))

RAWdf = pd.DataFrame(RAWlist, columns=sup.tag_columns+[sup.class_numeric_column])

In [6]:
RAWdf

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_0.mp4,p01,1,0,0,39
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_10.mp4,p01,1,0,10,38
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_2.mp4,p01,1,0,2,30
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_3.mp4,p01,1,0,3,31
...,...,...,...,...,...,...
4151,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_v.mp4,p11,5,1,v,24
4152,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_w.mp4,p11,5,1,w,25
4153,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_x.mp4,p11,5,1,x,26
4154,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_y.mp4,p11,5,1,y,27


### Selecting relevant videos and classes

#### Counting the number of frames per video
Dropping videos with less than 12 frames, as often a majority of the frames are not an accurate representation of the sign

In [7]:
RAWdf["frame_count"] = RAWdf[sup.fileid_col].apply(ph1.step1.count_frames)
count = (RAWdf["frame_count"]< 12).sum()
print(count)

114


In [8]:
RAWdf = RAWdf[RAWdf["frame_count"] >= 12].reset_index(drop=True)  # drops rows and cleans the index
RAWdf

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric,frame_count
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_0.mp4,p01,1,0,0,39,22
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,36
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_10.mp4,p01,1,0,10,38,34
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_2.mp4,p01,1,0,2,30,22
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_3.mp4,p01,1,0,3,31,34
...,...,...,...,...,...,...,...
4037,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_v.mp4,p11,5,1,v,24,32
4038,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_w.mp4,p11,5,1,w,25,28
4039,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_x.mp4,p11,5,1,x,26,45
4040,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_y.mp4,p11,5,1,y,27,39


In [9]:
RAWdf = RAWdf.drop(
    columns=[
        "frame_count",
    ]
)

#### Selecting relevant classes
The class corresponding to '0' is much smaller than the rest, so we will drop
all of those samples

In [10]:
RAWdf[sup.class_name_column].value_counts()


class_name
z     110
g     110
x     110
t     110
s     110
q     110
a     109
o     109
k     109
j     109
u     108
6     108
7     108
e     108
v     107
r     107
i     107
9     107
10    107
4     107
1     106
c     106
n     106
2     106
3     106
d     104
m     104
b     104
w     103
p     103
5     102
f     102
8     102
l     102
h      99
y      99
rr     96
ll     92
0      30
Name: count, dtype: int64

In [11]:
RAWdf = RAWdf[RAWdf[sup.class_name_column] != '0'].reset_index(drop=True)
RAWdf

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_10.mp4,p01,1,0,10,38
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_2.mp4,p01,1,0,2,30
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_3.mp4,p01,1,0,3,31
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_4.mp4,p01,1,0,4,32
...,...,...,...,...,...,...
4007,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_v.mp4,p11,5,1,v,24
4008,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_w.mp4,p11,5,1,w,25
4009,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_x.mp4,p11,5,1,x,26
4010,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_y.mp4,p11,5,1,y,27


# Video to landmarks

## Per frame data

In [12]:
PH1_per_frame_df = pd.concat(RAWdf.apply(ph1.step1.extract_landmarks_per_frame, axis=1).tolist(), ignore_index=True)
PH1_per_frame_df

W0000 00:00:1749425004.000789  476747 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric,first_frame,current_frame,num_candidate_hands,current_candidate_hand,...,current_candidate_pose,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,0,...,0,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,2,...,0,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,0,...,0,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,2,...,0,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,2,4,0,...,0,0.497244,0.318666,-0.457492,0.579072,0.523931,-0.151225,0.423043,0.499277,-0.169967
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94338,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,9,4,2,...,0,0.484490,0.289181,-0.421546,0.572918,0.474172,-0.078406,0.397896,0.484275,-0.113990
94339,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,0,...,0,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788
94340,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,2,...,0,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788
94341,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,11,4,0,...,0,0.480457,0.287692,-0.410172,0.570401,0.466859,-0.068966,0.398920,0.484171,-0.099403


In [13]:
# There should never be more than one candidate pose, since there is only one
# person per video in our dataset
# This will make our model "weaker" during inference in the sense that we can't
# train it to focus on the person signing like we may be able to train it to 
# focus on the active hand.
PH1_per_frame_df["num_candidate_poses"].ne(1).sum()

0

## Active hand
As of this point in the process, for each frame, it is possible that mediapipe
detects multiple candidate hands. This means that if we wish to store the 
landmark data video by video in a single row contain the landmark data for each
of the video's frames, we have one of two choices:
1. We store all the data detected in each frame, including that for inactive
hands
2. We choose only the likeliest active hand and store the data for it

The purpose of this project is to train a model which predicts the sign being
performed. Since all of our signs are single-hand signs, this means the
information from all other hands has to be discarded. Therefore, we choose
option 2.

For each video (and thus for each frame in each video), we know the expected 
handedness thanks to the naming convention for the files used by the creator of
the dataset. Also, mediapipe gives us confidence scores for each of the hands
it detects. Therefore, we will use this information to go through our perframe
dataset, and for frames for which there is more than one hand detected, choose
to keep only the data for the hand that is the likeliest to be active. 


### Frames with only one detected hand

In [14]:
PH1_only_one_df = PH1_per_frame_df.copy()
PH1_only_one_df[sup.active_hand_col] = -1

In [15]:
pair_counts = PH1_only_one_df.groupby([sup.fileid_col, sup.current_frame_col]).size()
pair_counts


fileid                                             current_frame
p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4      0                2
                                                   1                2
                                                   2                2
                                                   3                2
                                                   4                2
                                                                   ..
p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4  7                2
                                                   8                2
                                                   9                2
                                                   10               2
                                                   11               2
Length: 48140, dtype: int64

In [16]:
only_one_detected_hand_mask = PH1_only_one_df.set_index([sup.fileid_col, sup.current_frame_col]).index.map(pair_counts) == 1
print(only_one_detected_hand_mask.sum())


1937


In [17]:
PH1_only_one_df.loc[only_one_detected_hand_mask, sup.active_hand_col] = 1
PH1_only_one_df

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric,first_frame,current_frame,num_candidate_hands,current_candidate_hand,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,0,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,-1
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,2,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,-1
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,0,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,-1
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,2,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,-1
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,2,4,0,...,0.497244,0.318666,-0.457492,0.579072,0.523931,-0.151225,0.423043,0.499277,-0.169967,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94338,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,9,4,2,...,0.484490,0.289181,-0.421546,0.572918,0.474172,-0.078406,0.397896,0.484275,-0.113990,-1
94339,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,0,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,-1
94340,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,2,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,-1
94341,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,11,4,0,...,0.480457,0.287692,-0.410172,0.570401,0.466859,-0.068966,0.398920,0.484171,-0.099403,-1


In [18]:
count_minus_ones = (PH1_only_one_df[sup.active_hand_col] == -1).sum()
print(count_minus_ones)

92406


In [19]:
PH1_only_one_df.groupby('fileid').size()

fileid
p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4        24
p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_10.mp4       24
p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_2.mp4        24
p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_3.mp4        24
p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_4.mp4        24
                                                     ..
p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_v.mp4    24
p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_w.mp4    24
p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_x.mp4    24
p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_y.mp4    24
p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4    24
Length: 4012, dtype: int64

### Frames with only with one detected hand for which the handedness matches the expected handedness

In [20]:
PH1_check_handedness_df = PH1_only_one_df.copy()

In [21]:
def keep_correct_handedness(group):
    # Only proceed if group has more than 1 row
    if len(group) > 1:
        # Boolean mask where handedness matches detected_handedness
        matches = group[sup.handedness_column] == group["detected_handedness"]
        count_matches = matches.sum()
        
        if count_matches == 1:
            # Set active_hand: 1 for the matching row, 0 for the rest
            group.loc[matches, sup.active_hand_col] = 1
            group.loc[~matches, sup.active_hand_col] = 0
        # Else, do nothing (leave active_hand as is)
    return group

PH1_check_handedness_df = PH1_check_handedness_df.groupby(
    [sup.fileid_col, sup.current_frame_col], group_keys=False
).apply(keep_correct_handedness)


  ).apply(keep_correct_handedness)


In [22]:
PH1_check_handedness_df

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric,first_frame,current_frame,num_candidate_hands,current_candidate_hand,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,0,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,1
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,2,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,0
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,0,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,1
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,2,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,0
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,2,4,0,...,0.497244,0.318666,-0.457492,0.579072,0.523931,-0.151225,0.423043,0.499277,-0.169967,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94338,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,9,4,2,...,0.484490,0.289181,-0.421546,0.572918,0.474172,-0.078406,0.397896,0.484275,-0.113990,1
94339,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,0,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,1
94340,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,2,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,0
94341,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,11,4,0,...,0.480457,0.287692,-0.410172,0.570401,0.466859,-0.068966,0.398920,0.484171,-0.099403,1


In [23]:
count_minus_ones = (PH1_check_handedness_df[sup.active_hand_col] == -1).sum()
print(count_minus_ones)

1556


### Frames with multiple hands with the expected handedness

In [24]:
PH1_check_confidence_df = PH1_check_handedness_df.copy()

In [25]:
def keep_confident(group):
    if (group[sup.active_hand_col] == -1).all() and len(group) > 1:
        idx_max = group["confidence"].idxmax()
        group = group.copy()  # avoid potential SettingWithCopy issues
        group.loc[group.index == idx_max, sup.active_hand_col] = 1
        group.loc[group.index != idx_max, sup.active_hand_col] = 0

        best_confidence = group.loc[idx_max, "confidence"]
        if best_confidence < 0.9:
            print(f"not confident about anybody in f{group[sup.current_frame_col]}, {group[sup.fileid_col]}")
    return group

PH1_check_confidence_df = PH1_check_confidence_df.groupby(
    [sup.fileid_col, sup.current_frame_col], group_keys=False
).apply(keep_confident)



not confident about anybody in f27525    2
27526    2
Name: current_frame, dtype: int64, 27525    p04/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_10...
27526    p04/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_10...
Name: fileid, dtype: object
not confident about anybody in f27529    4
27530    4
Name: current_frame, dtype: int64, 27529    p04/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_10...
27530    p04/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_10...
Name: fileid, dtype: object
not confident about anybody in f27531    5
27532    5
Name: current_frame, dtype: int64, 27531    p04/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_10...
27532    p04/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_10...
Name: fileid, dtype: object
not confident about anybody in f29263    0
29264    0
Name: current_frame, dtype: int64, 29263    p04/Ciclo_2_5_Izquierda/Ciclo_2_5_Izquierda_10...
29264    p04/Ciclo_2_5_Izquierda/Ciclo_2_5_Izquierda_10...
Name: fileid, dtype: object
not confident about anybody in f29265    1
29266    1
Name: 

  ).apply(keep_confident)


In [26]:
PH1_check_confidence_df

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric,first_frame,current_frame,num_candidate_hands,current_candidate_hand,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,0,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,1
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,2,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,0
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,0,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,1
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,2,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,0
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,2,4,0,...,0.497244,0.318666,-0.457492,0.579072,0.523931,-0.151225,0.423043,0.499277,-0.169967,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94338,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,9,4,2,...,0.484490,0.289181,-0.421546,0.572918,0.474172,-0.078406,0.397896,0.484275,-0.113990,1
94339,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,0,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,1
94340,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,2,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,0
94341,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,11,4,0,...,0.480457,0.287692,-0.410172,0.570401,0.466859,-0.068966,0.398920,0.484171,-0.099403,1


In [27]:
count_minus_ones = (PH1_check_confidence_df[sup.active_hand_col] == -1).sum()
print(count_minus_ones)

0


In [28]:
print(PH1_check_confidence_df.columns.tolist())

['fileid', 'person_id', 'cycle_num', 'handedness', 'class_name', 'class_numeric', 'first_frame', 'current_frame', 'num_candidate_hands', 'current_candidate_hand', 'detected_handedness', 'confidence', 'h0x', 'h0y', 'h0z', 'h1x', 'h1y', 'h1z', 'h2x', 'h2y', 'h2z', 'h3x', 'h3y', 'h3z', 'h4x', 'h4y', 'h4z', 'h5x', 'h5y', 'h5z', 'h6x', 'h6y', 'h6z', 'h7x', 'h7y', 'h7z', 'h8x', 'h8y', 'h8z', 'h9x', 'h9y', 'h9z', 'h10x', 'h10y', 'h10z', 'h11x', 'h11y', 'h11z', 'h12x', 'h12y', 'h12z', 'h13x', 'h13y', 'h13z', 'h14x', 'h14y', 'h14z', 'h15x', 'h15y', 'h15z', 'h16x', 'h16y', 'h16z', 'h17x', 'h17y', 'h17z', 'h18x', 'h18y', 'h18z', 'h19x', 'h19y', 'h19z', 'h20x', 'h20y', 'h20z', 'num_candidate_poses', 'current_candidate_pose', 'p0x', 'p0y', 'p0z', 'p11x', 'p11y', 'p11z', 'p12x', 'p12y', 'p12z', 'active_hand']


### Dropping videos with missing frames

In [29]:
expected_frames = list(range(12))

missing_frames_grouped = PH1_check_confidence_df[PH1_check_confidence_df[sup.active_hand_col] == 1]\
  .groupby(sup.fileid_col)\
  .filter(lambda g: len(g) != 12)\
  .groupby(sup.fileid_col)

for fileid, group in missing_frames_grouped:
    actual_frames = group[sup.current_frame_col].tolist()
    missing_frames = sorted(set(expected_frames) - set(actual_frames))
    print(f"\nFileID: {fileid}")
    print(f"Missing frames: {missing_frames}")


FileID: p01/Ciclo_3_5_Izquierda/Ciclo_3_5_Izquierda_n.mp4
Missing frames: [10]

FileID: p04/Ciclo_3_5_Derecha/Ciclo_3_5_Derecha_q.mp4
Missing frames: [1, 3]

FileID: p04/Ciclo_5_5_Derecha/Ciclo_5_5_Derecha_k.mp4
Missing frames: [0]


In [30]:
for fileid, group in missing_frames_grouped:
  print(f"\nFileID: {fileid}")
  print(PH1_check_confidence_df[
    (PH1_check_confidence_df[sup.fileid_col] == fileid)]
    [[sup.current_frame_col, sup.active_hand_col]])


FileID: p01/Ciclo_3_5_Izquierda/Ciclo_3_5_Izquierda_n.mp4
      current_frame  active_hand
4687              0            1
4688              1            1
4689              2            1
4690              3            1
4691              4            1
4692              5            1
4693              6            1
4694              7            1
4695              8            1
4696              9            1
4697             11            1

FileID: p04/Ciclo_3_5_Derecha/Ciclo_3_5_Derecha_q.mp4
       current_frame  active_hand
30662              0            1
30663              2            1
30664              4            1
30665              5            1
30666              6            1
30667              6            0
30668              7            1
30669              8            1
30670              8            0
30671              9            0
30672              9            1
30673             10            1
30674             11            1

FileID: p04/C

In [31]:
PH1_no_missing_frames_df = PH1_check_confidence_df[
  ~PH1_check_confidence_df[sup.fileid_col].isin(
    [fileid for fileid, group in missing_frames_grouped]
  )
]

In [32]:
PH1_no_missing_frames_df

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric,first_frame,current_frame,num_candidate_hands,current_candidate_hand,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand
0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,0,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,1
1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,0,4,2,...,0.498476,0.318532,-0.423539,0.577536,0.519796,-0.141686,0.425831,0.492205,-0.158985,0
2,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,0,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,1
3,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,1,4,2,...,0.497645,0.315444,-0.436276,0.577870,0.520437,-0.134161,0.424087,0.495226,-0.161746,0
4,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_1.mp4,p01,1,0,1,29,12,2,4,0,...,0.497244,0.318666,-0.457492,0.579072,0.523931,-0.151225,0.423043,0.499277,-0.169967,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94338,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,9,4,2,...,0.484490,0.289181,-0.421546,0.572918,0.474172,-0.078406,0.397896,0.484275,-0.113990,1
94339,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,0,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,1
94340,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,10,4,2,...,0.483038,0.285413,-0.406400,0.570957,0.473768,-0.055774,0.396282,0.485782,-0.104788,0
94341,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_z.mp4,p11,5,1,z,28,27,11,4,0,...,0.480457,0.287692,-0.410172,0.570401,0.466859,-0.068966,0.398920,0.484171,-0.099403,1


# Write out

In [33]:
# These columns were only useful to determine the active hand.
PH1_final_df = PH1_no_missing_frames_df.drop(
    columns=[
        "num_candidate_hands",
        "current_candidate_hand",
        "detected_handedness",
        "confidence",
        "num_candidate_poses",
        "current_candidate_pose",
        "first_frame"
    ]
)

In [34]:
print(PH1_final_df.columns.tolist())

['fileid', 'person_id', 'cycle_num', 'handedness', 'class_name', 'class_numeric', 'current_frame', 'h0x', 'h0y', 'h0z', 'h1x', 'h1y', 'h1z', 'h2x', 'h2y', 'h2z', 'h3x', 'h3y', 'h3z', 'h4x', 'h4y', 'h4z', 'h5x', 'h5y', 'h5z', 'h6x', 'h6y', 'h6z', 'h7x', 'h7y', 'h7z', 'h8x', 'h8y', 'h8z', 'h9x', 'h9y', 'h9z', 'h10x', 'h10y', 'h10z', 'h11x', 'h11y', 'h11z', 'h12x', 'h12y', 'h12z', 'h13x', 'h13y', 'h13z', 'h14x', 'h14y', 'h14z', 'h15x', 'h15y', 'h15z', 'h16x', 'h16y', 'h16z', 'h17x', 'h17y', 'h17z', 'h18x', 'h18y', 'h18z', 'h19x', 'h19y', 'h19z', 'h20x', 'h20y', 'h20z', 'p0x', 'p0y', 'p0z', 'p11x', 'p11y', 'p11z', 'p12x', 'p12y', 'p12z', 'active_hand']


In [35]:
PH1_final_df[PH1_final_df['fileid'] == 'p01/Ciclo_3_5_Izquierda/Ciclo_3_5_Izquierda_n.mp4']

Unnamed: 0,fileid,person_id,cycle_num,handedness,class_name,class_numeric,current_frame,h0x,h0y,h0z,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand


In [36]:
PH1_final_df.to_csv(sup.PH1_DATA_AH_PF_CSV, index=False)