# Movement Feature Analysis - Per Phase

## Imports and Global Helper Functions

In [None]:
import re
import warnings
from pathlib import Path

import biopsykit as bp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from biopsykit.stats import StatsPipeline
from fau_colors import cmaps, register_fausans_font

from stresspose_analysis.data_wrangling import (
    add_concat_feature_name_to_index,
    add_multiindex_to_stats_results,
    rename_motion_features,
)
from stresspose_analysis.plotting.mainstudy import plot_motion_features_per_phase


%load_ext autoreload
%autoreload 2
%matplotlib widget

In [None]:
register_fausans_font()

plt.close("all")

palette = sns.color_palette(cmaps.faculties_light)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = "FAUSans Office"


palette

## Setup Paths

In [None]:
root_path = Path("../../../")
input_path = root_path.joinpath("output/classification")

In [None]:
output_path = root_path.joinpath("results")
img_path = output_path.joinpath("plots")
stats_path = output_path.joinpath("statistics")

bp.utils.file_handling.mkdirs([img_path, stats_path])

In [None]:
# dict for renaming condition names for use in plots
condition_mapping = {"tsst": "TSST", "ftsst": "f-TSST"}

## Load Data

In [None]:
feature_files = sorted(input_path.glob("*_movement_features_per_phase_for_classification*.csv"))

job_ids = [re.findall(r"\d+", str(file.name))[0] for file in feature_files]
job_ids

In [None]:
index = -1

feature_file = feature_files[index]

print("Selected File:")
print(f"{feature_file.name}")

In [None]:
data = bp.io.load_long_format_csv(feature_file)
data = data.rename(index=condition_mapping, level="condition")
data = add_concat_feature_name_to_index(data)

data.head()

## Information about All Features

In [None]:
data_unstack = data.unstack(["subject", "condition"])
num_features = pd.DataFrame(data_unstack.groupby("feature_type").size(), columns=["Count"]).T
num_features["Total"] = len(data_unstack)

num_features

## Analysis

### Statistics

In [None]:
steps = [("prep", "normality"), ("test", "pairwise_tests")]
params = {
    "dv": "data",
    "within": "condition",
    "subject": "subject",
    "groupby": "feature_concat",
    "multicomp": {"method": "bonf", "levels": True},
    "parametric": False,
}

pipeline = StatsPipeline(
    steps=steps,
    params=params,
)

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=UserWarning)
    pipeline.apply(data)

pipeline.export_statistics(stats_path.joinpath("stats_motion_features_per_phase.xlsx"))

### Information about Significant Features

In [None]:
data_stats = pipeline.results["pairwise_tests"][["A", "B", "W-val", "p-corr", "hedges"]].droplevel(-1)
data_stats = add_multiindex_to_stats_results(data_stats, data)
data_stats = data_stats.sort_values(by="p-corr", ascending=True)
data_stats_sig = data_stats[data_stats["p-corr"] < 0.05]

In [None]:
index_level_names_tex = ["Phase", "Feature Type", "Body Part", "Channel", "Type", "Metric"]
data_stats_table = data_stats_sig.droplevel("feature_concat")
data_stats_table = rename_motion_features(data_stats_table)

for index_level in data_stats_table.index.names:
    data_stats_table = data_stats_table.rename(index=lambda x: x.replace("%", "\%"), level=index_level)

data_stats_table = data_stats_table.droplevel(["A", "B"])

ttest_result_latex = pipeline.results_to_latex_table(
    stats_test="within",
    data=data_stats_table,
    index_kws={"index_level_names_tex": index_level_names_tex},
    caption="Results of statistical tests of extracted body posture and movement features between TSST and f-TSST from the \\textit{Main Study}, computed seperately over \\textit{Interview} and \\textit{Mental Arithmetics} phases of the (f-)TSST. p-values are corrected for multiple comparisons using the Bonferroni method. \\textit{Note}: Only motion features with statistically significant ($p < 0.05$) differences are shown.",
    show_a_b=False,
)

# some dirty manual postprocessing of output
ttest_result_latex = re.sub(
    r"\\sisetup{table-format = <1.3}",
    r"\\sisetup{table-format = <1.3}\n\\resizebox{\\textwidth}{!}{",
    ttest_result_latex,
)
ttest_result_latex = re.sub(r"\\end{tabular}", r"\\end{tabular}\n}", ttest_result_latex)

print(ttest_result_latex)

In [None]:
result_dict = {}

In [None]:
result_dict["Statistic Results"] = data_stats_sig

In [None]:
df_num_features = pd.DataFrame(
    {"All": len(data_stats), "Significant": len(data_stats_sig)}, index=["Number of Features"]
).T
result_dict["Features"] = df_num_features
df_num_features

In [None]:
df_num_features_type = pd.DataFrame(data_stats_sig.groupby("feature_type").size(), columns=["Number of Features"])
result_dict["Features per Type"] = df_num_features_type
df_num_features_type

In [None]:
df_num_features_phase = pd.DataFrame(data_stats_sig.groupby("phase").size(), columns=["Number of Features"])
result_dict["Features per Phase"] = df_num_features_phase
df_num_features_phase

In [None]:
df_num_features_body_part = pd.DataFrame(
    data_stats_sig.groupby("body_part").size().sort_values(ascending=False), columns=["Number of Features"]
)
result_dict["Features per Body Part"] = df_num_features_body_part
df_num_features_body_part

## Plot Selected Motion Features

In [None]:
for feature_type in ["generic", "expert"]:
    fig, axs = plot_motion_features_per_phase(data, stats_pipeline=pipeline, feature_type=feature_type)
    fig.savefig(img_path.joinpath(f"img_motion_features_{feature_type}_per_phase.pdf"), transparent=True)

## Export

In [None]:
bp.io.write_pandas_dict_excel(result_dict, output_path.joinpath("overview_significant_features_per_phase.xlsx"))