# Bloodspot Processing

In [98]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from fau_colors import cmaps
import biopsykit as bp
from biopsykit.io import biomarker

from empkins_io.datasets.d03.micro_gapvii import MicroBaseDataset

%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [99]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

In [100]:
deploy_type = "local"

In [101]:
config_dict = json.load(Path("../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/Volumes/luca_ssd/Study_Data/2022_12_GAPVII_Micro')

In [102]:
path_out = base_path.joinpath("data_tabular/bloodspots/final")
bp.utils.file_handling.mkdirs([path_out])

In [103]:
dataset = MicroBaseDataset(base_path)
dataset

Unnamed: 0,subject,condition,phase
0,VP_001,tsst,Prep
1,VP_001,tsst,Pause_1
2,VP_001,tsst,Talk
3,VP_001,tsst,Pause_2
4,VP_001,tsst,Pause_3
...,...,...,...
1691,VP_110,ftsst,Pause_2
1692,VP_110,ftsst,Pause_3
1693,VP_110,ftsst,Math
1694,VP_110,ftsst,Pause_4


## CRP

### Samples

In [104]:
crp_data_1_0 = bp.io.biomarker.load_biomarker_results(
    base_path.joinpath("data_tabular/bloodspots/cleaned/crp_values_1_0.xlsx"), biomarker_type="crp", regex_str=r"(VP_\d+)_(T\w)_(B\w)"
)

crp_data_1_1 = bp.io.biomarker.load_biomarker_results(base_path.joinpath("data_tabular/bloodspots/cleaned/crp_values_1_1.xlsx"), biomarker_type="crp", regex_str=r"EmpD03micro_(VP_\d+)_(D\w)_(B\w)")

In [105]:
crp_data_1_0

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,crp
subject,day,sample,Unnamed: 3_level_1
VP_01,T1,B0,1.317150
VP_01,T1,B1,1.134765
VP_01,T2,B0,1.354440
VP_01,T2,B1,1.227120
VP_02,T1,B0,0.857475
...,...,...,...
VP_40,T1,B1,
VP_40,T2,B0,
VP_40,T2,B1,
VP_44,T2,B0,


In [106]:
crp_data = pd.concat([crp_data_1_0, crp_data_1_1])

In [107]:
crp_data = crp_data.reset_index().replace({"D1": "T1", "D2" : "T2"})

# add zeros to get 3 digits
crp_data["subject"] = "VP_" + crp_data["subject"].str.split("_").str[1].str.zfill(3)

# drop na in subject
crp_data = crp_data.dropna(subset=["subject"])

crp_data = crp_data.set_index(["subject", "day", "sample"])

crp_data = crp_data.join(dataset.condition_day_mapping).reset_index()
crp_data = crp_data.drop(columns="day").set_index(["subject", "condition", "sample"])
crp_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,crp
subject,condition,sample,Unnamed: 3_level_1
VP_001,ftsst,B0,1.317150
VP_001,ftsst,B1,1.134765
VP_001,tsst,B0,1.354440
VP_001,tsst,B1,1.227120
VP_002,ftsst,B0,0.857475
...,...,...,...
VP_109,ftsst,B1,0.202440
VP_110,ftsst,B0,0.130809
VP_110,ftsst,B1,0.135434
VP_110,tsst,B0,0.144998


In [108]:
# wide format
crp_data = crp_data.pivot_table(index="subject", columns=["condition", "sample"], values="crp")

In [109]:
# squeeze multiindex
crp_data.columns = [f"{col[1]}_{col[0]}" for col in crp_data.columns]

In [110]:
crp_data = crp_data.add_prefix("crp_")

In [111]:
crp_data

Unnamed: 0_level_0,crp_B0_ftsst,crp_B1_ftsst,crp_B0_tsst,crp_B1_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
VP_001,1.317150,1.134765,1.354440,1.227120
VP_002,0.857475,1.040430,1.259580,1.434495
VP_003,0.337380,0.295342,0.311038,0.302088
VP_004,0.678855,0.988515,0.728970,0.545085
VP_005,0.541875,0.616560,0.583320,0.533710
...,...,...,...,...
VP_106,1.262130,0.992715,1.311870,1.359030
VP_107,0.729345,1.003455,0.390000,0.771015
VP_108,2.658090,2.438895,2.164440,2.402655
VP_109,0.150351,0.202440,0.111384,0.394792


In [112]:
crp_data.to_csv(path_out.joinpath("crp.csv"))