In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os

os.environ["PADDEL_DIRS__RAW"] = "../data/raw"
os.environ["PADDEL_DIRS__CACHE"] = "../data"

MIN_DETECTION_TIME = 15

In [None]:
import logging
import pandas as pd
import numpy as np

logging.basicConfig(level=logging.INFO)

In [None]:
from paddel import settings

### Load video paths

In [None]:
df = pd.DataFrame(settings.dirs.raw.iterdir(), columns=["video_path"])

### Filename features

In [None]:
from paddel.preprocessing import extract_filename_features

(
    df["individual_type"],
    df["hand"],
    df["gender"],
    df["age"],
    df["handedness"],
) = np.vectorize(extract_filename_features)(df["video_path"])

df.drop(df[df["individual_type"] == -1].index, inplace=True)
df.drop(df[df["hand"] == -1].index, inplace=True)
df.drop(df[df["handedness"] == -1].index, inplace=True)

### Video framerate

In [None]:
from paddel.preprocessing.video import extract_video_framerate

df["framerate"] = np.vectorize(extract_video_framerate)(df["video_path"])

df.drop(df[df["framerate"] == -1].index, inplace=True)

### Landmarks

In [None]:
from paddel.preprocessing import extract_landmarks

df["landmarks"] = np.vectorize(extract_landmarks, otypes="O")(df["video_path"])
df["landmark_count"] = df["landmarks"].map(len)
df["detection_time"] = df["landmark_count"] / df["framerate"]

df.drop(df[df["detection_time"] < MIN_DETECTION_TIME].index, inplace=True)

### Classic features

In [None]:
from paddel.preprocessing import extract_classic_features

df["tap_rate"], df["tap_rate_difference"] = np.vectorize(extract_classic_features)(
    df["landmarks"], df["framerate"]
)

### Remove unnecessary columns

In [None]:
df.drop(["landmarks", "framerate", "landmark_count", "detection_time"], axis=1, inplace=True)

In [None]:
df