# StressGait - Saliva Preprocessing

## Setup and Helper Functions

In [1]:
import json
from pathlib import Path

import biopsykit as bp
import matplotlib.pyplot as plt
import pandas as pd
import pingouin as pg
import seaborn as sns
from biopsykit.questionnaires.utils import compute_scores, wide_to_long
from biopsykit.utils.dataframe_handling import convert_nan
from fau_colors import cmaps, register_fausans_font

from stressgait_analysis.dataset import StressGaitDataset

%matplotlib widget
%load_ext autoreload
%autoreload 2

In [3]:
#register_fausans_font()
plt.close("all")

palette = sns.color_palette(cmaps.faculties_light)
sns.set_theme(context="notebook", style="ticks", font="sans-serif", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"
plt.rcParams["font.family"] = "sans-serif"
#plt.rcParams["font.sans-serif"] = "FAUSans Office"

palette

In [4]:
deploy_type = "local"

config_dict = json.load(Path("../../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/home/rzlin/ys64ofuj/Data/StressGait/Data')

In [5]:
dataset = StressGaitDataset(base_path, coarse_condition=True)

dataset

Unnamed: 0,participant,condition
0,VP_01,omc
1,VP_02,control
2,VP_05,control
3,VP_06,omc
4,VP_07,control
5,VP_08,control
6,VP_10,omc
7,VP_11,control
8,VP_12,control
9,VP_13,omc


In [6]:
export_path = base_path.joinpath("saliva/processed")
export_path.mkdir(exist_ok=True)

In [7]:
cort_path = base_path.joinpath(f"saliva/cleaned/stressgait_cortisol.xlsx")
cort_data = bp.io.biomarker.load_saliva_plate(cort_path, saliva_type="cortisol", regex_str="(VP_\d+)_(\w+)")
cort_data.index = cort_data.index.set_names(["subject", "sample"])

cort_features = [bp.saliva.max_increase(cort_data), bp.saliva.max_increase(cort_data, percent=True), bp.saliva.auc(cort_data, remove_s0=False, sample_times=dataset.sample_times)]
cort_features = pd.concat(cort_features, axis=1)
cort_features = bp.saliva.utils.saliva_feature_wide_to_long(cort_features, saliva_type="cortisol")

cort_data.to_csv(export_path.joinpath(f"stressgait_cortisol_samples.csv"))
cort_features.to_csv(export_path.joinpath(f"stressgait_cortisol_features.csv"))

  cort_data = bp.io.biomarker.load_saliva_plate(cort_path, saliva_type="cortisol", regex_str="(VP_\d+)_(\w+)")


In [8]:
amy_path = base_path.joinpath(f"saliva/cleaned/stressgait_amylase.xlsx")
amy_data = bp.io.biomarker.load_saliva_plate(amy_path, saliva_type="amylase", regex_str="StressGait_(VP_\d+)_(\w+)")
amy_data.index = amy_data.index.set_names(["subject", "sample"])

amy_features = [bp.saliva.max_increase(amy_data, saliva_type="amylase"), bp.saliva.max_increase(amy_data, percent=True, saliva_type="amylase"), bp.saliva.auc(amy_data, remove_s0=False, sample_times=dataset.sample_times, saliva_type="amylase")]
amy_features = pd.concat(amy_features, axis=1)
amy_features = bp.saliva.utils.saliva_feature_wide_to_long(amy_features, saliva_type="amylase")

amy_data.to_csv(export_path.joinpath(f"stressgait_amylase_samples.csv"))
amy_features.to_csv(export_path.joinpath(f"stressgait_amylase_samples.csv"))

  amy_data = bp.io.biomarker.load_saliva_plate(amy_path, saliva_type="amylase", regex_str="StressGait_(VP_\d+)_(\w+)")


## Check for Outlier

In [9]:
from scipy.stats import zscore

cort_zscore = zscore(cort_data.xs("S0", level="sample").drop(index=dataset.PARTICIPANTS_EXCLUDED, errors="ignore"), nan_policy="omit").abs()
display(cort_zscore)

outlier = (cort_zscore > 3).any(axis=1)
outlier

Unnamed: 0_level_0,cortisol
subject,Unnamed: 1_level_1
VP_01,
VP_02,0.217769
VP_05,1.045088
VP_06,0.635901
VP_07,0.277379
VP_08,0.180835
VP_10,0.536261
VP_11,0.623634
VP_12,1.31935
VP_13,0.281248


subject
VP_01    False
VP_02    False
VP_05    False
VP_06    False
VP_07    False
VP_08    False
VP_10    False
VP_11    False
VP_12    False
VP_13    False
VP_15    False
VP_16    False
VP_17    False
VP_18    False
VP_19    False
VP_20    False
VP_22    False
VP_23    False
VP_24    False
VP_25    False
VP_26    False
VP_27    False
VP_28    False
VP_29    False
VP_30    False
VP_31    False
VP_32    False
VP_33    False
VP_34    False
VP_35    False
VP_36    False
VP_37    False
VP_38    False
VP_39     True
VP_40    False
VP_41    False
VP_42    False
VP_43    False
VP_44    False
VP_45    False
VP_46    False
VP_47    False
dtype: bool