# Bloodspot Processing

In [23]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from fau_colors import cmaps
import biopsykit as bp
from biopsykit.io import biomarker

from empkins_io.datasets.d03.macro_ap01 import MacroBaseDataset

%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

In [25]:
deploy_type = "local"

In [26]:
config_dict = json.load(Path("../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/Volumes/luca_ssd/Study_Data/2022_05_AP01_Macro')

In [27]:
path_out = base_path.joinpath("data_tabular/bloodspots/final")
bp.utils.file_handling.mkdirs([path_out])

In [28]:
dataset = MacroBaseDataset(base_path)
dataset

Unnamed: 0,subject,condition
0,VP_01,ftsst
1,VP_01,tsst
2,VP_02,ftsst
3,VP_02,tsst
4,VP_04,ftsst
...,...,...
73,VP_39,tsst
74,VP_40,ftsst
75,VP_40,tsst
76,VP_41,ftsst


## CRP

### Samples

In [29]:
crp_data = bp.io.biomarker.load_biomarker_results(
    base_path.joinpath("data_tabular/bloodspots/raw/crp_values.xlsx"), biomarker_type="crp", regex_str=r"(VP_\d+)-(T\w)-(B\w)"
)

crp_data = crp_data.join(dataset.day_condition_map).reset_index()
crp_data = crp_data.drop(columns="day").set_index(["subject", "condition", "sample"])
crp_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,crp
subject,condition,sample,Unnamed: 3_level_1
VP_01,tsst,B0,1.28780
VP_01,tsst,B1,0.87412
VP_01,ftsst,B0,0.88948
VP_01,ftsst,B1,0.93484
VP_02,tsst,B0,0.61150
...,...,...,...
VP_40,tsst,B1,21.88960
VP_41,ftsst,B0,3.78896
VP_41,ftsst,B1,4.38420
VP_41,tsst,B0,5.11540


In [30]:
# long format
crp_data = crp_data.pivot_table(index="subject", columns=["condition", "sample"], values="crp")

In [31]:
# squeeze multiindex
crp_data.columns = [f"{col[1]}_{col[0]}" for col in crp_data.columns]

In [33]:
crp_data = crp_data.add_prefix("crp_")

In [34]:
crp_data

Unnamed: 0_level_0,crp_B0_ftsst,crp_B1_ftsst,crp_B0_tsst,crp_B1_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
VP_01,0.88948,0.93484,1.2878,0.87412
VP_02,0.42942,0.16826,0.6115,0.47074
VP_03,2.10016,1.23264,1.94704,1.54988
VP_04,6.0112,5.5016,6.4348,7.4134
VP_05,0.285412,0.54994,0.4995,0.221688
VP_06,1.36556,1.15644,0.95184,0.69366
VP_07,0.24116,0.149816,0.70968,0.189816
VP_08,4.3698,6.1324,,0.24762
VP_09,1.91158,1.45776,1.13352,0.41256
VP_10,3.02536,3.65014,2.62912,2.51192


In [35]:
crp_data.to_csv(path_out.joinpath("crp.csv"))