In [None]:
import logging
import pickle
from pathlib import Path

import numpy as np
import pandas as pd

from paddel.preprocessing import extract_filename_features
from paddel.preprocessing.video import is_video, extract_video_framerate

In [None]:
logging.basicConfig(level=logging.INFO)

data_dir = Path("../data")
raw_dir = data_dir / "raw"
file_features_path = data_dir / "file_features_df.pkl"
y_path = data_dir / "y.pkl"

Load paths

In [None]:
df = pd.DataFrame(raw_dir.iterdir(), columns=["video_path"])

Extract features from filename

In [None]:
filename_features = pd.DataFrame.from_records(df["video_path"].apply(extract_filename_features))

df = pd.concat([df, filename_features], axis=1, copy=False)

df.drop(df[df["group"] == -1].index, inplace=True)
df.drop(df[df["hand"] == -1].index, inplace=True)
df.drop(df[df["handedness"] == -1].index, inplace=True)

Drop non-video files

In [None]:
indexes_to_keep = df[np.vectorize(is_video)(df["video_path"])].index

df = df.loc[indexes_to_keep]

Extract video framerate

In [None]:
df["framerate"] = df["video_path"].apply(extract_video_framerate)

Create target

In [None]:
y = pd.Series(df["group"], index=df.index)
df.drop("group", axis=1, inplace=True)

Save dataframe and target

In [None]:
with open(file_features_path, "wb") as f:
    pickle.dump(df, f)

with open(y_path, "wb") as f:
    pickle.dump(y, f)