# Context
Phase one of this project is feature extraction.\
This notebook drives the feature extraction process.

In [1]:
# Set project's environment variables
import os
import sys
from dotenv import load_dotenv
load_dotenv(dotenv_path="../project.env")
sys.path.append(os.environ["PYTHONPATH"])

In [2]:
# Import project-wide and PH1 specific variables and functions
import superheader as sup
import PH1header as ph1

Directory /Users/diego/Desktop/iteso/TOG/ exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/src exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/bin exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/raw/two-classes exists. Continuing with execution
Directory /Users/diego/Desktop/iteso/TOG/data/PH1/two-classes exists. Continuing with execution


I0000 00:00:1748231545.741571  545733 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M3 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1748231545.748003  545787 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1748231545.752704  545790 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1748231545.754383  545733 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M3 Pro
W0000 00:00:1748231545.817765  545800 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1748231545.824280  545801 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for fe

# Local filesystem to pandas dataframe
The first step we take is to read the data from our filesystem into a pandas dataframe.
We use nltk to help us in this process.

In [3]:
from nltk.corpus.reader import CategorizedPlaintextCorpusReader
import pandas as pd

## NLTK Corpus

In [4]:
RAWcorpus = CategorizedPlaintextCorpusReader(ph1.RAW_DATA_ROOT, r".*\.mp4", cat_pattern=r"(.*)/")

## Pandas DataFrame

In [5]:
RAWlist = []

for fileid in RAWcorpus.fileids():
  RAWlist.append(ph1.get_tags(fileid))

RAWdf = pd.DataFrame(RAWlist, columns=["person_id", "cycle_num", "handedness", "class_name", "class_numeric", "fileid"])

In [6]:
RAWdf

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4
1,p01,1,0,b,1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_b.mp4
2,p01,1,1,a,0,p01/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_a.mp4
3,p01,1,1,b,1,p01/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_b.mp4
4,p01,2,0,a,0,p01/Ciclo_2_5_Derecha/Ciclo_2_5_Derecha_a.mp4
...,...,...,...,...,...,...
215,p11,4,1,b,1,p11/Ciclo_4_5_Izquierda/Ciclo_4_5_Izquierda_b.mp4
216,p11,5,0,a,0,p11/Ciclo_5_5_Derecha/Ciclo_5_5_Derecha_a.mp4
217,p11,5,0,b,1,p11/Ciclo_5_5_Derecha/Ciclo_5_5_Derecha_b.mp4
218,p11,5,1,a,0,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_a.mp4


### Selecting relevant videos and classes

#### Counting the number of frames per video
Dropping videos with less than 12 frames, as often a majority of the frames are not an accurate representation of the sign

In [7]:
RAWdf["frame_count"] = RAWdf["fileid"].apply(ph1.step1.count_frames)
count = (RAWdf["frame_count"]< 12).sum()
print(count)

7


In [8]:
RAWdf = RAWdf[RAWdf["frame_count"] >= 12].reset_index(drop=True)  # drops rows and cleans the index
RAWdf

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid,frame_count
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48
1,p01,1,0,b,1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_b.mp4,34
2,p01,1,1,a,0,p01/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_a.mp4,40
3,p01,1,1,b,1,p01/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_b.mp4,28
4,p01,2,0,a,0,p01/Ciclo_2_5_Derecha/Ciclo_2_5_Derecha_a.mp4,22
...,...,...,...,...,...,...,...
208,p11,4,1,b,1,p11/Ciclo_4_5_Izquierda/Ciclo_4_5_Izquierda_b.mp4,33
209,p11,5,0,a,0,p11/Ciclo_5_5_Derecha/Ciclo_5_5_Derecha_a.mp4,40
210,p11,5,0,b,1,p11/Ciclo_5_5_Derecha/Ciclo_5_5_Derecha_b.mp4,41
211,p11,5,1,a,0,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_a.mp4,108


#### Selecting relevant classes
The class corresponding to '0' is much smaller than the rest, so we will drop
all of those samples

In [9]:
RAWdf["class_name"].value_counts()


class_name
a    109
b    104
Name: count, dtype: int64

In [10]:
RAWdf = RAWdf[RAWdf["class_name"] != '0'].reset_index(drop=True)
RAWdf

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid,frame_count
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48
1,p01,1,0,b,1,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_b.mp4,34
2,p01,1,1,a,0,p01/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_a.mp4,40
3,p01,1,1,b,1,p01/Ciclo_1_5_Izquierda/Ciclo_1_5_Izquierda_b.mp4,28
4,p01,2,0,a,0,p01/Ciclo_2_5_Derecha/Ciclo_2_5_Derecha_a.mp4,22
...,...,...,...,...,...,...,...
208,p11,4,1,b,1,p11/Ciclo_4_5_Izquierda/Ciclo_4_5_Izquierda_b.mp4,33
209,p11,5,0,a,0,p11/Ciclo_5_5_Derecha/Ciclo_5_5_Derecha_a.mp4,40
210,p11,5,0,b,1,p11/Ciclo_5_5_Derecha/Ciclo_5_5_Derecha_b.mp4,41
211,p11,5,1,a,0,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_a.mp4,108


# Video to landmarks

## Per frame data

In [11]:
PH1_per_frame_df = pd.concat(RAWdf.apply(ph1.step1.extract_landmarks_per_frame, axis=1).tolist(), ignore_index=True)
PH1_per_frame_df

W0000 00:00:1748231550.624791  545802 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid,frame_count,first_frame,current_frame,num_candidate_hands,...,current_candidate_pose,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779
1,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779
2,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451
3,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451
4,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,2,4,...,0,0.511904,0.326614,-0.538389,0.582581,0.522652,-0.176160,0.420109,0.498809,-0.215022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5001,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,9,4,...,0,0.485571,0.225081,-0.449431,0.574521,0.399775,-0.116570,0.403689,0.412528,-0.128586
5002,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301
5003,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301
5004,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,11,4,...,0,0.482275,0.226709,-0.417354,0.574424,0.403286,-0.096126,0.401220,0.413737,-0.091982


In [12]:
# There should never be more than one candidate pose, since there is only one
# person per video in our dataset
# This will make our model "weaker" during inference in the sense that we can'try
# train it to focus on the person signing like we may be able to train it to 
# focus on the active hand.
PH1_per_frame_df["num_candidate_poses"].ne(1).sum()

0

In [13]:
PH1_per_frame_df.to_csv(ph1.PH1_DATA_PERFRAME_CSV)

## Per video data
As of this point in the process, for each frame, it is possible that mediapipe
detects multiple candidate hands. This means that if we wish to store the 
landmark data video by video in a single row contain the landmark data for each
of the video's frames, we have one of two choices:
1. We store all the data detected in each frame, including that for inactive
hands
2. We choose only the likeliest active hand and store the data for it

The purpose of this project is to train a model which predicts the sign being
performed. Since all of our signs are single-hand signs, this means the
information from all other hands has to be discarded. Therefore, we choose
option 2.

For each video (and thus for each frame in each video), we know the expected 
handedness thanks to the naming convention for the files used by the creator of
the dataset. Also, mediapipe gives us confidence scores for each of the hands
it detects. Therefore, we will use this information to go through our perframe
dataset, and for frames for which there is more than one hand detected, choose
to keep only the data for the hand that is the likeliest to be active. 


### Active hand

#### Frames with only one detected hand

In [14]:
PH1_only_one_df = PH1_per_frame_df.copy()
PH1_only_one_df["active_hand"] = -1

In [15]:
pair_counts = PH1_only_one_df.groupby(["fileid", "current_frame"]).size()
pair_counts


fileid                                             current_frame
p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4      0                2
                                                   1                2
                                                   2                2
                                                   3                2
                                                   4                2
                                                                   ..
p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4  7                2
                                                   8                2
                                                   9                2
                                                   10               2
                                                   11               2
Length: 2556, dtype: int64

In [16]:
only_one_detected_hand_mask = PH1_only_one_df.set_index(["fileid", "current_frame"]).index.map(pair_counts) == 1
print(only_one_detected_hand_mask.sum())


106


In [17]:
PH1_only_one_df.loc[only_one_detected_hand_mask, "active_hand"] = 1
PH1_only_one_df

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid,frame_count,first_frame,current_frame,num_candidate_hands,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779,-1
1,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779,-1
2,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451,-1
3,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451,-1
4,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,2,4,...,0.511904,0.326614,-0.538389,0.582581,0.522652,-0.176160,0.420109,0.498809,-0.215022,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5001,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,9,4,...,0.485571,0.225081,-0.449431,0.574521,0.399775,-0.116570,0.403689,0.412528,-0.128586,-1
5002,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301,-1
5003,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301,-1
5004,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,11,4,...,0.482275,0.226709,-0.417354,0.574424,0.403286,-0.096126,0.401220,0.413737,-0.091982,-1


In [18]:
count_minus_ones = (PH1_only_one_df["active_hand"] == -1).sum()
print(count_minus_ones)

4900


#### Frames with only with one detected hand for which the handedness matches the expected handedness

In [19]:
PH1_check_handedness_df = PH1_only_one_df.copy()

In [20]:
def keep_correct_handedness(group):
    # Only proceed if group has more than 1 row
    if len(group) > 1:
        # Boolean mask where handedness matches detected_handedness
        matches = group["handedness"] == group["detected_handedness"]
        count_matches = matches.sum()
        
        if count_matches == 1:
            # Set active_hand: 1 for the matching row, 0 for the rest
            group.loc[matches, "active_hand"] = 1
            group.loc[~matches, "active_hand"] = 0
        # Else, do nothing (leave active_hand as is)
    return group

PH1_check_handedness_df = PH1_check_handedness_df.groupby(
    ["fileid", "current_frame"], group_keys=False
).apply(keep_correct_handedness)


  ).apply(keep_correct_handedness)


In [21]:
PH1_check_handedness_df

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid,frame_count,first_frame,current_frame,num_candidate_hands,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779,1
1,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779,0
2,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451,1
3,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451,0
4,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,2,4,...,0.511904,0.326614,-0.538389,0.582581,0.522652,-0.176160,0.420109,0.498809,-0.215022,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5001,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,9,4,...,0.485571,0.225081,-0.449431,0.574521,0.399775,-0.116570,0.403689,0.412528,-0.128586,0
5002,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301,1
5003,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301,0
5004,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,11,4,...,0.482275,0.226709,-0.417354,0.574424,0.403286,-0.096126,0.401220,0.413737,-0.091982,1


In [22]:
count_minus_ones = (PH1_check_handedness_df["active_hand"] == -1).sum()
print(count_minus_ones)

100


#### Frames with multiple hands with the expected handedness

In [23]:
PH1_check_confidence_df = PH1_check_handedness_df.copy()

In [24]:
def keep_confident(group):
    if (group["active_hand"] == -1).all() and len(group) > 1:
        idx_max = group["confidence"].idxmax()
        group = group.copy()  # avoid potential SettingWithCopy issues
        group.loc[group.index == idx_max, "active_hand"] = 1
        group.loc[group.index != idx_max, "active_hand"] = 0

        best_confidence = group.loc[idx_max, "confidence"]
        if best_confidence < 0.9:
            print(f"not confident about anybody in f{group['fileid'], group['current_frame']}")
    return group

PH1_check_confidence_df = PH1_check_confidence_df.groupby(
    ["fileid", "current_frame"], group_keys=False
).apply(keep_confident)



  ).apply(keep_confident)


In [25]:
PH1_check_confidence_df

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid,frame_count,first_frame,current_frame,num_candidate_hands,...,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z,active_hand
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779,1
1,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,4,...,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779,0
2,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451,1
3,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,4,...,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451,0
4,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,2,4,...,0.511904,0.326614,-0.538389,0.582581,0.522652,-0.176160,0.420109,0.498809,-0.215022,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5001,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,9,4,...,0.485571,0.225081,-0.449431,0.574521,0.399775,-0.116570,0.403689,0.412528,-0.128586,0
5002,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301,1
5003,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,4,...,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301,0
5004,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,11,4,...,0.482275,0.226709,-0.417354,0.574424,0.403286,-0.096126,0.401220,0.413737,-0.091982,1


In [26]:
count_minus_ones = (PH1_check_confidence_df["active_hand"] == -1).sum()
print(count_minus_ones)

0


In [27]:
PH1_check_confidence_df.to_csv(ph1.PH1_DATA_ACTIVE_CSV)

### Keeping only one detection per frame

In [28]:
PH1_single_row_per_frame_df = PH1_check_confidence_df.copy()

PH1_single_row_per_frame_df = PH1_single_row_per_frame_df[PH1_single_row_per_frame_df["active_hand"] == 1]

In [29]:
print(PH1_single_row_per_frame_df.columns.tolist())


['person_id', 'cycle_num', 'handedness', 'class_name', 'class_numeric', 'fileid', 'frame_count', 'first_frame', 'current_frame', 'num_candidate_hands', 'current_candidate_hand', 'detected_handedness', 'confidence', 'h0x', 'h0y', 'h0z', 'h1x', 'h1y', 'h1z', 'h2x', 'h2y', 'h2z', 'h3x', 'h3y', 'h3z', 'h4x', 'h4y', 'h4z', 'h5x', 'h5y', 'h5z', 'h6x', 'h6y', 'h6z', 'h7x', 'h7y', 'h7z', 'h8x', 'h8y', 'h8z', 'h9x', 'h9y', 'h9z', 'h10x', 'h10y', 'h10z', 'h11x', 'h11y', 'h11z', 'h12x', 'h12y', 'h12z', 'h13x', 'h13y', 'h13z', 'h14x', 'h14y', 'h14z', 'h15x', 'h15y', 'h15z', 'h16x', 'h16y', 'h16z', 'h17x', 'h17y', 'h17z', 'h18x', 'h18y', 'h18z', 'h19x', 'h19y', 'h19z', 'h20x', 'h20y', 'h20z', 'num_candidate_poses', 'current_candidate_pose', 'p0x', 'p0y', 'p0z', 'p11x', 'p11y', 'p11z', 'p12x', 'p12y', 'p12z', 'active_hand']


In [30]:
PH1_single_row_per_frame_df = PH1_single_row_per_frame_df.drop(
    columns=[
        "num_candidate_hands",
        "current_candidate_hand",
        "detected_handedness",
        "confidence",
        "num_candidate_poses",
        "current_candidate_pose",
        "active_hand"
    ]
)

In [31]:
PH1_single_row_per_frame_df

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,fileid,frame_count,first_frame,current_frame,h0x,...,h20z,p0x,p0y,p0z,p11x,p11y,p11z,p12x,p12y,p12z
0,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,0,0.356595,...,-0.002761,0.509847,0.324974,-0.511315,0.582418,0.521928,-0.164979,0.419079,0.505322,-0.183779
2,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,1,0.352816,...,-0.002850,0.513431,0.326082,-0.519151,0.579848,0.519345,-0.151877,0.419200,0.498523,-0.208451
4,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,2,0.351332,...,-0.003898,0.511904,0.326614,-0.538389,0.582581,0.522652,-0.176160,0.420109,0.498809,-0.215022
6,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,3,0.350582,...,-0.004673,0.511146,0.324653,-0.543441,0.580992,0.523306,-0.178667,0.421722,0.496027,-0.223688
8,p01,1,0,a,0,p01/Ciclo_1_5_Derecha/Ciclo_1_5_Derecha_a.mp4,48,18,4,0.349610,...,-0.004360,0.513160,0.330011,-0.510589,0.580729,0.525749,-0.150138,0.420450,0.504078,-0.184120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4996,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,7,0.634376,...,-0.011062,0.487095,0.224240,-0.451780,0.574954,0.401786,-0.123889,0.404117,0.410934,-0.111879
4998,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,8,0.635004,...,-0.011773,0.486833,0.225193,-0.428193,0.575367,0.402983,-0.103338,0.404408,0.412782,-0.091784
5000,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,9,0.634422,...,-0.011464,0.485571,0.225081,-0.449431,0.574521,0.399775,-0.116570,0.403689,0.412528,-0.128586
5002,p11,5,1,b,1,p11/Ciclo_5_5_Izquierda/Ciclo_5_5_Izquierda_b.mp4,21,4,10,0.633578,...,-0.012498,0.484376,0.228379,-0.438624,0.573325,0.403236,-0.096671,0.400964,0.415107,-0.109301


In [32]:
#  Checking we have 12 frames per video
print(PH1_single_row_per_frame_df.groupby("fileid").size().value_counts())

12    213
Name: count, dtype: int64


### Per video dataframe

In [33]:
PH1_per_video_df = PH1_single_row_per_frame_df.copy()

In [34]:
PH1_per_video_df = PH1_per_video_df.drop(
    columns=[
        "fileid",
        "frame_count",
        "first_frame",
    ]
)

In [35]:
print(PH1_per_video_df.columns.tolist())

['person_id', 'cycle_num', 'handedness', 'class_name', 'class_numeric', 'current_frame', 'h0x', 'h0y', 'h0z', 'h1x', 'h1y', 'h1z', 'h2x', 'h2y', 'h2z', 'h3x', 'h3y', 'h3z', 'h4x', 'h4y', 'h4z', 'h5x', 'h5y', 'h5z', 'h6x', 'h6y', 'h6z', 'h7x', 'h7y', 'h7z', 'h8x', 'h8y', 'h8z', 'h9x', 'h9y', 'h9z', 'h10x', 'h10y', 'h10z', 'h11x', 'h11y', 'h11z', 'h12x', 'h12y', 'h12z', 'h13x', 'h13y', 'h13z', 'h14x', 'h14y', 'h14z', 'h15x', 'h15y', 'h15z', 'h16x', 'h16y', 'h16z', 'h17x', 'h17y', 'h17z', 'h18x', 'h18y', 'h18z', 'h19x', 'h19y', 'h19z', 'h20x', 'h20y', 'h20z', 'p0x', 'p0y', 'p0z', 'p11x', 'p11y', 'p11z', 'p12x', 'p12y', 'p12z']


In [36]:
video_tag_cols = [
    "person_id", "cycle_num", "handedness",
    "class_name", "class_numeric"
]


In [37]:
def flatten_group(group):
    # Columns to be flattened (all except video_tag_cols + current_frame)
    data_cols = [col for col in group.columns if col not in video_tag_cols + ["current_frame"]]

    flattened_dict = {}

    for _, row in group.iterrows():
        frame_num = int(row["current_frame"])
        prefix = f"f{frame_num}_"
        for col in data_cols:
            flattened_dict[prefix + col] = row[col]

    # Add the group keys (the tags)
    group_keys = group.iloc[0][video_tag_cols].to_dict()
    group_keys.update(flattened_dict)
    return pd.Series(group_keys)

PH1_per_video_df = PH1_per_video_df.groupby(video_tag_cols).apply(flatten_group).reset_index(drop=True)

  PH1_per_video_df = PH1_per_video_df.groupby(video_tag_cols).apply(flatten_group).reset_index(drop=True)


In [38]:
PH1_per_video_df

Unnamed: 0,person_id,cycle_num,handedness,class_name,class_numeric,f0_h0x,f0_h0y,f0_h0z,f0_h1x,f0_h1y,...,f11_h20z,f11_p0x,f11_p0y,f11_p0z,f11_p11x,f11_p11y,f11_p11z,f11_p12x,f11_p12y,f11_p12z
0,p01,1,0,a,0,0.356595,0.485021,-1.312914e-07,0.373518,0.459913,...,0.000712,0.513618,0.329477,-0.509254,0.587519,0.527019,-0.149709,0.425322,0.497731,-0.168903
1,p01,1,0,b,1,0.348771,0.466237,4.743118e-08,0.363611,0.446080,...,-0.009232,0.513528,0.320183,-0.516217,0.585881,0.525961,-0.175390,0.422400,0.499646,-0.196429
2,p01,1,1,a,0,0.475774,0.927311,-9.812992e-08,0.494941,0.935337,...,-0.005570,0.489232,0.332752,-0.768854,0.573585,0.491954,-0.368677,0.400048,0.538888,-0.429950
3,p01,1,1,b,1,0.673130,0.452971,-6.348814e-09,0.662367,0.436210,...,0.004865,0.477038,0.329535,-0.724772,0.572456,0.499061,-0.355822,0.396333,0.538750,-0.390981
4,p01,2,0,a,0,0.356703,0.455645,-1.280792e-07,0.373605,0.434001,...,-0.001645,0.505790,0.328168,-0.510567,0.585073,0.526921,-0.162060,0.420092,0.496173,-0.172387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208,p11,4,1,b,1,0.645514,0.291388,-1.497878e-07,0.628773,0.270614,...,-0.004285,0.497029,0.178938,-0.321385,0.578110,0.349044,-0.055098,0.406686,0.370372,-0.046639
209,p11,5,0,a,0,0.352055,0.521995,-5.147281e-08,0.371064,0.510372,...,-0.011197,0.476834,0.193496,-0.210986,0.574185,0.363997,0.093915,0.400514,0.367275,0.079185
210,p11,5,0,b,1,0.365258,0.335842,-5.006000e-08,0.385060,0.323807,...,-0.015492,0.484834,0.194117,-0.179015,0.579442,0.370878,0.104476,0.407891,0.372225,0.081452
211,p11,5,1,a,0,0.644268,0.334418,-1.884289e-07,0.624991,0.314313,...,-0.001607,0.484898,0.196024,-0.487740,0.579202,0.366986,-0.131724,0.406939,0.388264,-0.146042


In [39]:
PH1_per_video_df.to_csv(ph1.PH1_DATA_PERVIDEO_CSV)