In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import logging

logging.basicConfig(level=logging.INFO)

### Load paths

In [None]:
from pathlib import Path
import pandas as pd

raw_dir = Path("../data/raw")
work_dir = Path("../data")
landmarks_dir = work_dir / "landmarks"

work_dir.mkdir(exist_ok=True)
landmarks_dir.mkdir(exist_ok=True)

df = pd.DataFrame(raw_dir.iterdir(), columns=["video_path"]).astype(str)

### Get filenames

In [None]:
df["filename"] = df["video_path"].map(lambda vp: Path(vp).stem)
df["landmarks_path"] = df["filename"].map(lambda fn: landmarks_dir / f"{fn}.pkl").astype(str)

## Get filename features

In [None]:
from paddel.preprocessing import extract_filename_features

filename_features = df["filename"].apply(extract_filename_features)
df[filename_features.columns] = filename_features

In [None]:
df.dropna(inplace=True)

### Substitute filename features

In [None]:
from paddel.preprocessing.filename import substitute_individual_type, substitute_hand, substitute_gender, \
    substitute_age, substitute_handedness

df["individual_type"] = df["individual_type"].map(substitute_individual_type)
df["hand"] = df["hand"].map(substitute_hand)
df["gender"] = df["gender"].map(substitute_gender)
df["age"] = df["age"].map(substitute_age)
df["handedness"] = df["handedness"].map(substitute_handedness)

In [None]:
df = df[df["individual_type"] != -1]
df = df[df["hand"] != -1]
df = df[df["handedness"] != -1]

### Get video framerate

In [None]:
from paddel.preprocessing import extract_video_framerate

df["framerate"] = df["video_path"].apply(extract_video_framerate)

In [None]:
df.dropna(inplace=True)

## Get landmarks

In [None]:
from paddel.preprocessing import extract_landmarks

df["landmark_count"] = df.apply(
    lambda row: len(extract_landmarks(row["video_path"], row["landmarks_path"])),
    axis=1
)

In [None]:
df.drop(df[df["landmark_count"] / df["framerate"] < 10].index, inplace=True)

### Remove unnecessary columns

In [None]:
df.drop(["video_path", "filename", "date", "landmark_count"], axis=1, inplace=True)

## Classic features

In [None]:
from paddel.preprocessing import load_landmarks
from paddel.preprocessing import extract_classic_features

classic_features = df.apply(
    lambda row: extract_classic_features(
        row["framerate"],
        load_landmarks(row["landmarks_path"])
    ),
    axis=1
)

In [None]:
df[classic_features.columns] = classic_features
df