# StressGait - Saliva Preprocessing

## Setup and Helper Functions

In [None]:
import json
from pathlib import Path

import biopsykit as bp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from fau_colors.v2021 import register_cmaps

from stressgait_analysis.dataset import StressGaitDataset

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [None]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties_light)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"


palette

In [None]:
deploy_type = "local"

config_dict = json.load(Path("../../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

In [None]:
dataset = StressGaitDataset(base_path, coarse_condition=True)

dataset

In [None]:
export_path = base_path.joinpath("saliva/processed")
export_path.mkdir(exist_ok=True)

In [None]:
cort_path = base_path.joinpath("saliva/cleaned/stressgait_cortisol.xlsx")
cort_data = bp.io.biomarker.load_saliva_plate(cort_path, saliva_type="cortisol", regex_str=r"(VP_\d+)_(\w+)")
cort_data.index = cort_data.index.set_names(["subject", "sample"])

cort_features = [
    bp.saliva.max_increase(cort_data),
    bp.saliva.max_increase(cort_data, percent=True),
    bp.saliva.auc(cort_data, remove_s0=False, sample_times=dataset.sample_times),
]
cort_features = pd.concat(cort_features, axis=1)
cort_features = bp.saliva.utils.saliva_feature_wide_to_long(cort_features, saliva_type="cortisol")

cort_data.to_csv(export_path.joinpath("stressgait_cortisol_samples.csv"))
cort_features.to_csv(export_path.joinpath("stressgait_cortisol_features.csv"))

In [None]:
amy_path = base_path.joinpath("saliva/cleaned/stressgait_amylase.xlsx")
amy_data = bp.io.biomarker.load_saliva_plate(amy_path, saliva_type="amylase", regex_str=r"StressGait_(VP_\d+)_(\w+)")
amy_data.index = amy_data.index.set_names(["subject", "sample"])

amy_features = [
    bp.saliva.max_increase(amy_data, saliva_type="amylase"),
    bp.saliva.max_increase(amy_data, percent=True, saliva_type="amylase"),
    bp.saliva.auc(amy_data, remove_s0=False, sample_times=dataset.sample_times, saliva_type="amylase"),
]
amy_features = pd.concat(amy_features, axis=1)
amy_features = bp.saliva.utils.saliva_feature_wide_to_long(amy_features, saliva_type="amylase")

amy_data.to_csv(export_path.joinpath("stressgait_amylase_samples.csv"))
amy_features.to_csv(export_path.joinpath("stressgait_amylase_samples.csv"))

## Check for Outlier

In [None]:
from scipy.stats import zscore
import numpy as np
cort_zscore = np.abs(zscore(
    cort_data.xs("S0", level="sample").drop(index=dataset.PARTICIPANTS_EXCLUDED, errors="ignore"), nan_policy="omit"
))
display(cort_zscore)

outlier = (cort_zscore > 3).any(axis=1)
outlier