# Questionnaire Data Processing

In [1]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from fau_colors import cmaps
import biopsykit as bp

from empkins_io.datasets.d03.macro_ap01 import MacroBaseDataset

%load_ext autoreload
%autoreload 2
%matplotlib widget

## Setup

In [2]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

In [3]:
deploy_type = "local"

In [6]:
config_dict = json.load(Path("../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/Volumes/luca_ssd/Study_Data/2022_05_AP01_Macro')

In [None]:
path_out = base_path.joinpath("questionnaires/processed")
bp.utils.file_handling.mkdirs([path_out])

In [None]:
dataset = MacroBaseDataset(base_path)
dataset

## Compute Scores

In [None]:
quest_data = dataset.questionnaire
quest_data.head()

### State Questionnaires

In [None]:
level_order = ["subject", "day", "subscale", "time"]

#### STADI - State

In [None]:
stadi_data = {
    f"t{i}": bp.questionnaires.utils.compute_scores(
        quest_data,
        quest_dict={
            "STADI-pre": quest_data.filter(regex=f"STADI_.*_T{i}_Pre").columns,
            "STADI-post": quest_data.filter(regex=f"STADI_.*_T{i}_Post").columns,
        },
        quest_kwargs={"STADI-pre": {"stadi_type": "state"}, "STADI-post": {"stadi_type": "state"}},
    )
    for i in [1, 2]
}
stadi_data = pd.concat(stadi_data, names=["day"])
stadi_data = bp.questionnaires.utils.wide_to_long(stadi_data, "STADI", levels=["subscale", "time"])
stadi_data = stadi_data.reorder_levels(level_order).sort_index()
stadi_data.head()

#### PANAS

In [None]:
panas_data = {
    f"t{i}": bp.questionnaires.utils.compute_scores(
        quest_data,
        quest_dict={
            "PANAS-pre": quest_data.filter(regex=f"PANAS_.*_T{i}_Pre").columns,
            "PANAS-post": quest_data.filter(regex=f"PANAS_.*_T{i}_Post").columns,
        },
        quest_kwargs={"PANAS-pre": {"language": "german"}, "PANAS-post": {"language": "german"}},
    )
    for i in [1, 2]
}
panas_data = pd.concat(panas_data, names=["day"])
panas_data = bp.questionnaires.utils.wide_to_long(panas_data, "PANAS", levels=["subscale", "time"])
panas_data = panas_data.reorder_levels(level_order).sort_index()
panas_data

#### PASA

In [None]:
pasa_data_conv = bp.questionnaires.utils.convert_scale(quest_data.filter(like="PASA"), offset=1)
pasa_data_conv.head()

In [None]:
pasa_data = bp.questionnaires.utils.compute_scores(
    pasa_data_conv,
    quest_dict={
        "PASA-T1": pasa_data_conv.filter(regex=f"PASA_.*_T1").columns,
        "PASA-T2": pasa_data_conv.filter(regex=f"PASA_.*_T2").columns,
    },
)
pasa_data = bp.questionnaires.utils.wide_to_long(pasa_data, "PASA", ["subscale", "day"])
pasa_data = pd.concat({"pre": pasa_data}, names=["time"])
pasa_data = pasa_data.reorder_levels(level_order).sort_index()
pasa_data

In [None]:
quest_data_out = pd.concat([stadi_data, panas_data, pasa_data])
quest_data_out = quest_data_out.stack()
quest_data_out.index = quest_data_out.index.set_names("type", level=-1)
quest_data_out = pd.DataFrame(quest_data_out, columns=["data"])
quest_data_out = quest_data_out.rename(str.upper, level="day")

quest_data_out.head()

In [None]:
cond = dataset.day_condition_map
cond.head()

In [None]:
data_out = quest_data_out.join(cond).reset_index().drop(columns="day")
data_out = data_out.set_index(["subject", "condition", "type", "subscale", "time"]).sort_index()
data_out

#### Compute Relative Questionnaire Scores (pre/post)

In [None]:
data_relative = data_out.unstack("time").reindex(["pre", "post"], level=-1, axis=1).diff(axis=1)
data_relative = data_relative.loc[:, [("data", "post")]].droplevel(-1, axis=1)
data_relative = data_relative.dropna().round(3)
data_relative.head()

## Export

In [None]:
data_out.to_csv(path_out.joinpath("questionnaire_data_processed.csv"))
data_relative.to_csv(path_out.joinpath("questionnaire_data_processed_relative.csv"))