### Processing pipeline for BioVid HeatPain Database

In [None]:
import sys
from pathlib import Path
import os
import pandas as pd

sys.path.append(str(Path.cwd()))

from data_preparation.processing_pipeline_mediapipe import (
    load_reference_keypoints,
    video_to_feature_sequences,
    visualize_frontalized_points,
)
from data_preparation.process_dataset import ProcessingConfig, process_dataframe_to_npy

print("Modules imported")

In [None]:
cfg = ProcessingConfig()
print("Data dir:", cfg.data_dir)
print("Processed dir:", cfg.processed_data_dir)
print("Reference keypoints path:", cfg.reference_path)

In [None]:
train_df = pd.read_csv(os.path.join(cfg.data_dir, 'train.csv'))
val_df = pd.read_csv(os.path.join(cfg.data_dir, 'val.csv'))
test_df = pd.read_csv(os.path.join(cfg.data_dir, 'test.csv'))

print("Rows (train, val, test):", len(train_df), len(val_df), len(test_df))
train_df.head()

In [None]:
ref_kp, ref_ok = load_reference_keypoints(cfg.reference_path)
print("Reference keypoints loaded:", ref_ok)
if not ref_ok:
    print("Frontalization will be disabled (or set cfg.reference_keypoints_path to a valid file).")

In [None]:
sample = train_df.iloc[0]
sample_video_path = os.path.join(cfg.data_dir, sample['video_path'])
print("Visualizing:", sample_video_path)

video_to_feature_sequences(
    str(sample_video_path),
    frame_skip=3,
    reference_keypoints_3d=ref_kp,
    use_frontalization=(cfg.use_frontalization and ref_ok),
    visualize=True
)

In [None]:
process_dataframe_to_npy(train_df, 'train', cfg, video_to_feature_sequences, frame_skip=3, visualize=False)
process_dataframe_to_npy(val_df, 'val', cfg, video_to_feature_sequences, frame_skip=3, visualize=False)
process_dataframe_to_npy(test_df, 'test', cfg, video_to_feature_sequences, frame_skip=3, visualize=False)