# Feature Extraction

## Imports and Global Helper Functions

In [None]:
import json

from pathlib import Path

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm

import biopsykit as bp
from fau_colors import cmaps

from empkins_macro.feature_extraction import extract_generic_features, extract_expert_features
from stresspose_analysis.datasets.pilotstudy import PilotStudyDataset
from stresspose_analysis.feature_extraction.utils import load_generic_feature_dict, load_expert_feature_dict, remove_na

%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

## Path Setup

In [None]:
deploy_type = "local"

In [None]:
system = "perception_neuron"

In [None]:
root_path = Path("../../").resolve()

config_dict = json.load(root_path.joinpath("config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])

subject_dirs = bp.utils.file_handling.get_subject_dirs(base_path.joinpath("data_per_subject"), "VP_*")
print(f"Found {len(subject_dirs)} subjects.")

In [None]:
feature_dict_path = root_path.joinpath("params/feature_dicts")
output_path = root_path.joinpath("feature_export/motion_features")
bp.utils.file_handling.mkdirs(output_path)

In [None]:
dataset = PilotStudyDataset(base_path)

## Load Feature Extraction Config Dicts

### Load Generic Feature Dict

In [None]:
generic_feature_dict = load_generic_feature_dict(feature_dict_path)

### Load Expert Feature Dict

In [None]:
expert_feature_dict = load_expert_feature_dict(feature_dict_path, dataset.sampling_rate)

## Feature Extraction

In [None]:
result_dict = {}

In [None]:
index_levels = ["subject", "condition"]

for subset in tqdm(list(dataset.groupby(index_levels))):
    subject_id = subset.index["subject"][0]
    condition = subset.index["condition"][0]

    if (subject_id, condition) in result_dict:
        continue

    mocap_data = subset.mocap_data
    generic_features = extract_generic_features(mocap_data, generic_feature_dict, system=system)
    expert_features = extract_expert_features(mocap_data, expert_feature_dict, system=system)
    result_dict[(subject_id, condition)] = pd.concat([generic_features, expert_features])

motion_data_total = pd.concat(result_dict, names=index_levels)
motion_data_total = remove_na(motion_data_total)
motion_data_total

## Feature Cleaning

In [None]:
motion_data_cleaned = motion_data_total.unstack(["subject", "condition"])

# drop features that are NaN for any subject
motion_data_cleaned = motion_data_cleaned.dropna(how="any", axis=0)
# drop features that are constant (e.g., 0) for all subjects
std_mask = motion_data_cleaned.std(axis=1) != 0
motion_data_cleaned = motion_data_cleaned.loc[std_mask]

# bring dataframe back in original format
motion_data_cleaned = motion_data_cleaned.stack(["subject", "condition"])
motion_data_cleaned = motion_data_cleaned.reorder_levels(motion_data_total.index.names).sort_index()

motion_data_cleaned.head()

## Export

In [None]:
motion_data_total.to_csv(output_path.joinpath("motion_features.csv"))
motion_data_cleaned.to_csv(output_path.joinpath(f"motion_features_cleaned.csv"))
motion_data_cleaned.to_csv(output_path.joinpath(f"motion_features_for_classification.csv"))