# Saliva Processing

In [19]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from fau_colors import cmaps
import biopsykit as bp
from biopsykit.io import load_long_format_csv

from empkins_io.datasets.d03.macro_ap01 import MacroBaseDataset

%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

In [21]:
deploy_type = "local"

In [22]:
config_dict = json.load(Path("../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/Volumes/luca_ssd/Study_Data/2022_05_AP01_Macro')

In [23]:
path_out = base_path.joinpath("data_tabular/saliva/final")

In [24]:
dataset = MacroBaseDataset(base_path)
dataset

Unnamed: 0,subject,condition
0,VP_01,ftsst
1,VP_01,tsst
2,VP_02,ftsst
3,VP_02,tsst
4,VP_04,ftsst
...,...,...
73,VP_39,tsst
74,VP_40,ftsst
75,VP_40,tsst
76,VP_41,ftsst


## Cortisol

### Samples

In [55]:
cort_data = bp.io.saliva.load_saliva_plate(
    base_path.joinpath("data_tabular/saliva/raw/cortisol_values.xlsx"), saliva_type="cortisol", regex_str=r"(VP_\d+) (T\d)_(S\d)"
)

cort_data = cort_data.join(dataset.day_condition_map).reset_index()
cort_data = cort_data.drop(columns="day").set_index(["subject", "condition", "sample"])
cort_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cortisol
subject,condition,sample,Unnamed: 3_level_1
VP_01,tsst,S0,2.4813
VP_01,tsst,S1,2.54815
VP_01,tsst,S2,9.6268
VP_01,tsst,S3,12.748
VP_01,tsst,S4,13.3445


In [26]:
# long to wide
cort_data = cort_data.pivot_table(index=["subject"], columns=["condition","sample"], values="cortisol")

In [27]:
# squeeze multiindex
cort_data.columns = [f"{col[1]}_{col[0]}" for col in cort_data.columns]

In [29]:
# add cort prefix
cort_data = cort_data.add_prefix("cort_")

In [30]:
cort_data

Unnamed: 0_level_0,cort_S0_ftsst,cort_S1_ftsst,cort_S2_ftsst,cort_S3_ftsst,cort_S4_ftsst,cort_S5_ftsst,cort_S6_ftsst,cort_S7_ftsst,cort_S0_tsst,cort_S1_tsst,cort_S2_tsst,cort_S3_tsst,cort_S4_tsst,cort_S5_tsst,cort_S6_tsst,cort_S7_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
VP_01,4.24465,2.1892,2.3855,2.9745,2.81465,1.97155,2.01985,2.267,2.4813,2.54815,9.6268,12.748,13.3445,11.9125,6.45965,5.925
VP_02,3.0068,2.65,2.0981,2.0015,1.818,1.43105,1.4487,1.31895,1.0941,0.68617,1.44295,2.25265,1.889,1.67625,1.1404,1.1953
VP_03,8.5656,6.8351,9.5616,12.237,10.2155,7.81715,4.7723,5.18165,5.3223,3.35285,4.17085,8.8953,15.4495,14.8875,9.6484,7.68785
VP_04,2.25985,1.24725,1.4005,2.19535,2.70325,1.9746,1.51965,1.22185,0.888765,1.64405,1.85685,,4.24175,,2.53255,
VP_05,7.82225,9.8203,11.96,14.638,15.231,13.716,10.56,8.27335,8.4103,9.3095,12.3045,13.341,16.308,15.794,10.0314,7.7093
VP_06,1.2478,2.34285,5.50135,8.79645,8.02625,6.07115,3.76815,3.0442,1.4,1.20595,9.0868,13.1195,10.866,9.06075,3.9902,6.0575
VP_07,5.99155,4.4982,3.2534,2.7123,1.9518,1.59315,1.17326,1.24655,5.47795,3.0002,7.0681,9.5876,5.8149,4.42415,3.54175,3.09965
VP_08,6.2035,4.56615,3.7403,2.7743,2.43465,2.0226,1.9663,1.7114,4.10405,2.75055,4.441,5.1618,4.35855,3.2331,2.4085,2.26495
VP_09,2.33715,1.52945,1.31625,1.26495,0.67324,0.78821,1.3819,1.44005,2.3226,1.75325,3.25705,3.4449,2.9853,3.31935,2.7458,2.58935
VP_10,1.3574,1.9181,2.1388,2.7617,2.2055,1.68805,1.308,1.05738,1.02605,1.00975,1.43145,2.31225,2.53185,2.12965,1.5322,1.1748


In [31]:
# export
cort_data.to_csv(path_out.joinpath("cortisol.csv"))

### Features

In [56]:
cort_auc = bp.saliva.auc(cort_data, remove_s0=True, sample_times=dataset.sample_times_saliva, saliva_type="cortisol")

In [57]:
cort_max_inc = bp.saliva.max_increase(cort_data, remove_s0=True, saliva_type="cortisol")
cort_max_inc_percent = bp.saliva.max_increase(cort_data, remove_s0=True, percent=True, saliva_type="cortisol")

In [58]:
cort_slope = bp.saliva.slope(
    cort_data, sample_labels=["S1", "S4"], sample_times=dataset.sample_times_saliva, saliva_type="cortisol"
)

In [59]:
cort_features = pd.concat([cort_auc, cort_max_inc, cort_max_inc_percent, cort_slope], axis=1)
cort_features = bp.saliva.utils.saliva_feature_wide_to_long(cort_features, saliva_type="cortisol")
cort_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cortisol
subject,condition,saliva_feature,Unnamed: 3_level_1
VP_01,ftsst,auc_g,177.968575
VP_01,ftsst,auc_i,11.589375
VP_01,ftsst,max_inc,0.7853
VP_01,ftsst,max_inc_percent,35.871551
VP_01,ftsst,slopeS1S4,0.017374


In [60]:
# long to wide 
cort_features = cort_features.pivot_table(index=["subject"], columns=["condition","saliva_feature"], values="cortisol")

In [62]:
# squeeze multiindex
cort_features.columns = [f"{col[1]}_{col[0]}" for col in cort_features.columns]

In [63]:
# add cort_feat prefix
cort_features = cort_features.add_prefix("cort_feat_")

In [64]:
cort_features

Unnamed: 0_level_0,cort_feat_auc_g_ftsst,cort_feat_auc_i_ftsst,cort_feat_max_inc_ftsst,cort_feat_max_inc_percent_ftsst,cort_feat_slopeS1S4_ftsst,cort_feat_auc_g_tsst,cort_feat_auc_i_tsst,cort_feat_max_inc_tsst,cort_feat_max_inc_percent_tsst,cort_feat_slopeS1S4_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
VP_01,177.968575,11.589375,0.7853,35.871551,0.017374,691.597175,497.937775,10.79635,423.69366,0.299899
VP_02,136.5053,-64.8947,-0.5519,-20.826415,-0.023111,111.904845,59.755925,1.56648,228.29328,0.033412
VP_03,608.9669,89.4993,5.4019,79.031763,0.0939,710.19925,455.38265,12.09665,360.787092,0.336018
VP_04,133.337575,38.546575,1.456,116.736821,0.040444,,,2.5977,158.006143,0.072158
VP_05,922.223675,175.880875,5.4107,55.097095,0.150297,934.6245,227.1025,6.9985,75.175896,0.194403
VP_06,410.503675,232.447075,6.4536,275.459376,0.157872,580.21785,488.56565,11.91355,987.897508,0.268335
VP_07,172.67615,-169.18705,-1.2448,-27.673292,-0.070733,398.2937,170.2785,6.5874,219.565362,0.078186
VP_08,205.751025,-141.276375,-0.82585,-18.086353,-0.059208,267.26365,58.22185,2.41125,87.664285,0.044667
VP_09,90.2425,-25.9957,-0.0894,-5.845238,-0.023784,221.922825,88.675825,1.69165,96.486525,0.034224
VP_10,141.050375,-4.725225,0.8436,43.981023,0.007983,132.891225,56.150225,1.5221,150.740282,0.042281


In [65]:
cort_features.to_csv(path_out.joinpath("cortisol_features.csv"))

## Amylase

### Samples

In [43]:
amy_data = bp.io.saliva.load_saliva_plate(
    base_path.joinpath("data_tabular/saliva/raw/amylase_values.xlsx"), saliva_type="amylase", regex_str=r"(VP_\d+) (T\d)_(S\d)"
)

amy_data = amy_data.join(dataset.day_condition_map).reset_index()
amy_data = amy_data.drop(columns="day").set_index(["subject", "condition", "sample"])
amy_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,amylase
subject,condition,sample,Unnamed: 3_level_1
VP_01,tsst,S0,27.362294
VP_01,tsst,S1,59.029384
VP_01,tsst,S2,64.769112
VP_01,tsst,S3,70.782683
VP_01,tsst,S4,68.975326


In [36]:
# long to wide
amy_data = amy_data.pivot_table(index=["subject"], columns=["condition","sample"], values="amylase")

In [37]:
# squeeze multiindex
amy_data.columns = [f"{col[1]}_{col[0]}" for col in amy_data.columns]

In [39]:
# add amy prefix
amy_data = amy_data.add_prefix("amy_")

In [40]:
amy_data

Unnamed: 0_level_0,amy_S0_ftsst,amy_S1_ftsst,amy_S2_ftsst,amy_S3_ftsst,amy_S4_ftsst,amy_S5_ftsst,amy_S6_ftsst,amy_S7_ftsst,amy_S0_tsst,amy_S1_tsst,amy_S2_tsst,amy_S3_tsst,amy_S4_tsst,amy_S5_tsst,amy_S6_tsst,amy_S7_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
VP_01,49.981644,55.348947,94.990317,46.093087,34.953194,28.118098,30.08976,32.29145,27.362294,59.029384,64.769112,70.782683,68.975326,39.849489,69.336797,83.072712
VP_02,124.696698,176.39807,163.878013,180.483792,148.575721,76.90579,193.847283,142.211633,98.583124,135.858498,177.307225,121.432501,80.871023,142.638827,152.190436,121.903509
VP_03,95.921379,122.549777,159.858012,131.257953,125.781112,111.355115,86.468353,91.93424,97.951838,97.875125,239.893236,124.253518,82.52152,92.296977,92.428486,77.261184
VP_04,269.208649,92.143551,134.127607,191.706367,152.757963,90.444901,82.565356,68.340531,79.529704,74.904992,118.609616,28.274306,123.256246,70.740559,106.938245,2.487701
VP_05,211.903865,302.337807,230.720525,258.742773,220.112181,142.81812,161.63478,210.249051,174.708907,265.340111,259.608536,264.003109,263.882559,266.041489,131.508398,266.238751
VP_06,126.655854,130.294689,159.218761,128.349671,119.801639,97.358569,70.314921,84.41092,110.959342,100.042838,77.03277,96.36094,108.648287,96.095384,90.317746,94.738895
VP_07,105.870715,145.696937,147.354867,128.328139,139.258998,156.103861,139.804465,155.242598,196.827231,201.643125,191.623769,195.657349,190.39647,180.305342,122.729932,157.833563
VP_08,165.809831,234.278282,235.049624,85.695076,142.898135,87.466305,137.755859,218.108681,107.205672,127.78985,38.311835,161.838435,127.430991,159.383836,184.934629,197.910987
VP_09,26.625562,33.243862,34.148521,46.213824,42.728504,30.120405,20.007262,55.32708,25.073357,26.092289,59.802765,47.689848,42.357117,54.622398,23.197378,42.918959
VP_10,249.105178,229.574052,221.679706,220.317956,230.726303,154.34446,150.840094,223.441412,240.534718,225.450709,209.062085,234.230669,241.096559,232.621327,245.943631,227.974233


In [41]:
amy_data.to_csv(path_out.joinpath("amylase.csv"))

### Features

In [44]:
amy_auc = bp.saliva.auc(amy_data, remove_s0=True, sample_times=dataset.sample_times_saliva, saliva_type="amylase")

In [45]:
amy_max_inc = bp.saliva.max_increase(amy_data, remove_s0=True, saliva_type="amylase")
amy_max_inc_percent = bp.saliva.max_increase(amy_data, remove_s0=True, percent=True, saliva_type="amylase")

In [46]:
amy_slope = bp.saliva.slope(
    amy_data, sample_labels=["S1", "S4"], sample_times=dataset.sample_times_saliva, saliva_type="amylase"
)

In [47]:
amy_features = pd.concat([amy_auc, amy_max_inc, amy_max_inc_percent, amy_slope], axis=1)
amy_features = bp.saliva.utils.saliva_feature_wide_to_long(amy_features, saliva_type="amylase")
amy_features.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,amylase
subject,condition,saliva_feature,Unnamed: 3_level_1
VP_01,ftsst,auc_g,3537.764936
VP_01,ftsst,auc_i,-668.755051
VP_01,ftsst,max_inc,39.641369
VP_01,ftsst,max_inc_percent,71.620819
VP_01,ftsst,slopeS1S4,-0.566549


In [48]:
# long to wide
amy_features = amy_features.pivot_table(index=["subject"], columns=["condition","saliva_feature"], values="amylase")

In [50]:
# squeeze multiindex
amy_features.columns = [f"{col[1]}_{col[0]}" for col in amy_features.columns]

In [52]:
# add amy_feat prefix
amy_features = amy_features.add_prefix("amy_feat_")

In [53]:
amy_features

Unnamed: 0_level_0,amy_feat_auc_g_ftsst,amy_feat_auc_i_ftsst,amy_feat_max_inc_ftsst,amy_feat_max_inc_percent_ftsst,amy_feat_slopeS1S4_ftsst,amy_feat_auc_g_tsst,amy_feat_auc_i_tsst,amy_feat_max_inc_tsst,amy_feat_max_inc_percent_tsst,amy_feat_slopeS1S4_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
VP_01,3537.764936,-668.755051,39.641369,71.620819,-0.566549,4867.152884,380.919714,24.043329,40.731119,0.276276
VP_02,11765.769862,-1640.483426,17.449213,9.891952,-0.772843,10402.228339,76.982466,41.448727,30.508748,-1.52743
VP_03,9003.05997,-310.723053,37.308236,30.443332,0.089759,9075.77262,1637.263114,142.018111,145.101332,-0.426489
VP_04,8757.26476,1754.354904,99.562816,108.051855,1.683734,6186.774219,493.994858,48.351254,64.55011,1.34309
VP_05,16015.032412,-6962.640889,-43.595034,-14.419313,-2.284045,18072.09225,-2093.756161,0.898641,0.338675,-0.040488
VP_06,8499.47483,-1402.921546,28.924072,22.198964,-0.291474,7120.202122,-483.053529,8.60545,8.601765,0.23904
VP_07,10978.42926,-94.537934,10.406924,7.142857,-0.178832,13246.302561,-2078.574941,-5.985776,-2.9685,-0.312407
VP_08,12085.576877,-5719.572589,0.771341,0.329242,-2.538337,10646.692407,934.663792,70.121137,54.872227,-0.009968
VP_09,2684.387072,157.853587,22.083218,66.427958,0.263462,3228.477956,1245.463993,33.710476,129.19708,0.451801
VP_10,15101.216985,-2346.410986,1.152251,0.501908,0.032007,17577.02287,442.769003,20.492922,9.089757,0.434607


In [54]:
amy_features.to_csv(path_out.joinpath("amylase_features.csv"))

## Progesterone

In [67]:
prog_estr_data = pd.read_excel(
    base_path.joinpath("data_tabular/saliva/raw/estradiol_progesterone_values.xlsx"), sheet_name=["progesterone", "estradiol"]
)

prog_estr_data = {key: value[["sample ID", key]].dropna() for key, value in prog_estr_data.items()}
prog_estr_data = {
    key: data.assign(**{"subject": prog_estr_data[key]["sample ID"].astype(int).apply(lambda s: f"VP_{s:02d}")})
    for key, data in prog_estr_data.items()
}
prog_estr_data = {key: data.drop(columns="sample ID").set_index("subject") for key, data in prog_estr_data.items()}

prog_estr_data = pd.concat({key: val[key] for key, val in prog_estr_data.items()}, axis=1)
prog_estr_data.head()

Unnamed: 0_level_0,progesterone,estradiol
subject,Unnamed: 1_level_1,Unnamed: 2_level_1
VP_01,6.82945,8.39855
VP_02,8.65175,1.1939
VP_07,21.09,6.71075
VP_09,31.9815,7.9836
VP_12,18.0065,5.39575


In [68]:
prog_estr_data.to_csv(path_out.joinpath("progesterone_estradiol.csv"))