# Merge Questionnaire Data

In [1]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from fau_colors import cmaps
import biopsykit as bp

%load_ext autoreload
%autoreload 2
%matplotlib widget

In [2]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

In [3]:
deploy_type = "local"

In [11]:
config_dict = json.load(Path("../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/Volumes/luca_ssd/Study_Data/2022_05_AP01_Macro')

In [13]:
code_mapping = pd.read_csv(base_path.joinpath("data_tabular/extras/code_to_number_mapping.csv"))
code_mapping = code_mapping.set_index("Code")
code_mapping.head()

Unnamed: 0_level_0,subject
Code,Unnamed: 1_level_1
BG05W,VP_01
HZ03B,VP_02
NW15N,VP_03
KS08F,VP_04
FA01B,VP_05


## Load Questionnaire Data and Map Code to VP-ID

In [80]:
quest_data = pd.read_excel(base_path.joinpath("data_tabular/questionnaires/raw/unipark_screening.xlsx"))
quest_data = quest_data.rename(columns={"VPN_Kennung": "Code"})
quest_data = quest_data.set_index("Code")

quest_data = quest_data.join(code_mapping)

quest_data = quest_data.dropna(subset=["subject"]).set_index("subject")

quest_data

Unnamed: 0_level_0,lfdn,external_lfdn,tester,dispcode,lastpage,quality,duration,Einverstaendnis_1,Einverstaendnis_2,Geschlecht,...,rts7050127,rts7050128,rts7050129,rts7050130,rts7050131,rts7050132,rts7050133,rts7050134,rts7070232,rts7071496
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_09,130,0,0,31,7033748,-77,244,1,1,1,...,0,0,0,0,0,0,0,0,0,0
VP_09,131,0,0,31,7033773,-77,394,1,1,1,...,0,0,0,0,0,0,0,0,0,0
VP_09,158,0,0,31,7033773,-77,617,1,1,1,...,0,0,0,0,0,0,0,0,0,0
VP_09,306,0,0,31,7033731,-77,1136,1,1,1,...,321,366,417,448,549,653,720,893,1116,1130
VP_25,554,0,0,31,7033731,-77,1154,1,1,2,...,419,504,605,666,768,877,955,1083,1132,1147
VP_27,542,0,0,31,7033754,-77,570,1,1,2,...,0,0,0,0,0,0,0,0,0,0
VP_27,543,0,0,31,7033731,-77,2372,1,1,2,...,652,814,903,1103,1411,1558,1668,2038,2338,2367
VP_15,113,0,0,31,7033731,-77,2253,1,1,1,...,906,1004,1092,1186,1381,1560,1673,2121,2226,2243
VP_01,57,0,0,31,7033731,-77,2225,1,1,1,...,1148,1237,1371,1430,1614,1776,1860,2092,2203,2220
VP_41,813,0,0,31,7033765,-77,521,1,1,1,...,0,0,0,0,0,0,0,0,0,0


In [81]:
# keep last entry for each subject
quest_data = quest_data.groupby("subject").last()

In [82]:
quest_data

Unnamed: 0_level_0,lfdn,external_lfdn,tester,dispcode,lastpage,quality,duration,Einverstaendnis_1,Einverstaendnis_2,Geschlecht,...,rts7050127,rts7050128,rts7050129,rts7050130,rts7050131,rts7050132,rts7050133,rts7050134,rts7070232,rts7071496
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,57,0,0,31,7033731,-77,2225,1,1,1,...,1148,1237,1371,1430,1614,1776,1860,2092,2203,2220
VP_02,129,0,0,31,7033731,-77,1623,1,1,1,...,660,718,810,856,1111,1257,1347,1551,1608,1619
VP_03,135,0,0,31,7033731,-77,2565,1,1,2,...,682,1009,1119,1180,1437,1622,2012,2415,2530,2551
VP_04,177,0,0,31,7033731,-77,2440,1,1,2,...,830,907,1030,1095,1502,1923,2029,2320,2423,2434
VP_05,79,0,0,31,7033731,-77,1716,1,1,2,...,662,722,807,859,1029,1186,1300,1603,1681,1708
VP_06,308,0,0,31,7033731,-77,1999,1,1,2,...,1076,1131,1211,1258,1426,1535,1615,1894,1957,1992
VP_07,328,0,0,31,7033731,-77,2457,1,1,1,...,1027,1181,1381,1465,1736,1909,2014,2329,2432,2450
VP_08,382,0,0,31,7033731,-77,1962,1,1,2,...,695,763,852,906,1133,1312,1426,1828,1942,1955
VP_09,306,0,0,31,7033731,-77,1136,1,1,1,...,321,366,417,448,549,653,720,893,1116,1130
VP_10,371,0,0,31,7033731,-77,2994,1,1,2,...,592,674,779,841,1168,2269,2397,2711,2959,2988


In [83]:
# load renaming json
renaming_dict = json.load(Path("renaming.json").open(encoding="utf-8"))

renaming_dict

{'Geschlecht': 'Gender',
 'Alter': 'Age',
 'Geburtsdatum': 'Birth_Date',
 'Familienstand': 'Marital',
 'Bildungsabschluss': 'Education',
 'Beschaeftigungsverhaeltnis': 'Profession',
 'Beschaeftigungsverhaeltnis_Sonstige': 'Profession_Other',
 'Ethnische_Herkunft': 'Ethnicity',
 'Studiengang': 'Course_Study',
 'Berufsfeld': 'Professional_Field',
 'Muttersprache': 'Native_Language',
 'Koerpergroesse': 'Height',
 'Koerpergewicht': 'Weight',
 'Haendigkeit': 'Handedness',
 'Kontrazeptiva_Keine': 'Contraceptives_None',
 'Kontrazeptiva_Pille': 'Contraceptives_Pill',
 'Kontrazeptiva_hormonfrei': 'Contraceptives_Hormone_Free',
 'Kontrazeptiva_hormonell_andere': 'Contraceptives_Hormone_Other',
 'Menstruation_regelm_Zyklus': 'Menstruation_Regular_Cycle',
 'Menstruation_unregelm_Zyklus_Grund': 'Menstruation_Unregular_Cycle_Reason',
 'Menstruation_letzte_Regelblutung': 'Menstruation_Last_Period',
 'Zykluslaenge': 'Cycle_Length',
 'Menstruation_Zyklus_Schwankung': 'Menstruation_Cycle_Fluctuations',


In [84]:
quest_data = quest_data.rename(renaming_dict, axis=1)

# get only columns that  are in renaming dict and in the quest_data
columns = list(set(list(renaming_dict.values())) & set(quest_data.columns))

quest_data_filtered = quest_data[columns]

quest_data_filtered.head()

Unnamed: 0_level_0,Disease_Liver,Menstruation_Last_Period,Menstruation_Cycle_Fluctuations,Disease_Acute_Symptoms,Native_Language,Menstruation_Regular_Cycle,Disease_Other,Medication_Blood_Heart,Education,Medication_Anti_Inflammatory,...,Disease_Eye,Medication_Painkiller,Menstruation_Ovulation_Felt,Menstruation_Ovulation_Tracking,Menstruation_Ovulation_Test,Disease_Skin,Medication_Antidepressant,Medication_Insuline,Medication_Cholesterol,Height
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,2,2022-01-06,4,2,1,2,2,2,5,2,...,2,2,1,0,1,2,2,2,2,168
VP_02,2,2022-03-06,1,2,1,1,2,2,6,2,...,2,2,-77,-77,-77,2,2,2,2,174
VP_03,2,-66,-77,2,1,-77,2,2,6,2,...,2,2,-77,-77,-77,2,2,2,2,180
VP_04,2,-66,-77,2,1,-77,2,2,5,2,...,2,2,-77,-77,-77,2,2,2,2,178
VP_05,2,-66,-77,2,1,-77,2,2,5,2,...,2,2,-77,-77,-77,2,2,2,2,183


In [85]:
# replace all missing values
quest_data_filtered = quest_data_filtered.replace("-66", np.nan)
quest_data_filtered = quest_data_filtered.replace(-77, np.nan)

In [86]:
quest_data_filtered

Unnamed: 0_level_0,Disease_Liver,Menstruation_Last_Period,Menstruation_Cycle_Fluctuations,Disease_Acute_Symptoms,Native_Language,Menstruation_Regular_Cycle,Disease_Other,Medication_Blood_Heart,Education,Medication_Anti_Inflammatory,...,Disease_Eye,Medication_Painkiller,Menstruation_Ovulation_Felt,Menstruation_Ovulation_Tracking,Menstruation_Ovulation_Test,Disease_Skin,Medication_Antidepressant,Medication_Insuline,Medication_Cholesterol,Height
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,2,2022-01-06,4.0,2,1,2.0,2,2,5,2,...,2,2,1.0,0.0,1.0,2,2,2,2,168
VP_02,2,2022-03-06,1.0,2,1,1.0,2,2,6,2,...,2,2,,,,2,2,2,2,174
VP_03,2,,,2,1,,2,2,6,2,...,2,2,,,,2,2,2,2,180
VP_04,2,,,2,1,,2,2,5,2,...,2,2,,,,2,2,2,2,178
VP_05,2,,,2,1,,2,2,5,2,...,2,2,,,,2,2,2,2,183
VP_06,2,,,2,1,,2,2,5,2,...,2,2,,,,2,2,2,2,189
VP_07,2,2022-03-14,4.0,2,1,2.0,2,2,4,2,...,2,2,0.0,1.0,0.0,2,2,2,2,169
VP_08,2,,,2,1,,2,2,5,2,...,2,2,,,,2,2,2,2,184
VP_09,2,2022-03-01,2.0,2,1,1.0,2,2,5,2,...,2,2,,,,2,2,2,2,160
VP_10,2,,,2,1,,2,2,4,2,...,2,2,,,,2,2,2,2,184


## Add Condition Order

In [87]:
condition_order = pd.read_csv(base_path.joinpath("data_tabular/extras/condition_order.csv"))
condition_order = condition_order.set_index("subject")["condition_order"]
condition_order.head()

subject
VP_01     tsst_first
VP_02     tsst_first
VP_03    ftsst_first
VP_04    ftsst_first
VP_05    ftsst_first
Name: condition_order, dtype: object

## PASA

In [90]:
pasa_data = pd.read_excel(base_path.joinpath("data_tabular/questionnaires/raw/pasa.xlsx"))
pasa_data = pasa_data.set_index(["subject", "condition"])

# add "PASA_" to column names 
pasa_data.columns = "PASA_" + pasa_data.columns.astype(str).str.zfill(2)
pasa_data = pasa_data.unstack("condition")

# flatten multiindex
pasa_data.columns = ["_".join(col).strip() for col in pasa_data.columns.values]

# sort columns
pasa_data = pasa_data.sort_index(axis=1)

pasa_data.head()


Unnamed: 0_level_0,PASA_01_ftsst,PASA_01_tsst,PASA_02_ftsst,PASA_02_tsst,PASA_03_ftsst,PASA_03_tsst,PASA_04_ftsst,PASA_04_tsst,PASA_05_ftsst,PASA_05_tsst,...,PASA_12_ftsst,PASA_12_tsst,PASA_13_ftsst,PASA_13_tsst,PASA_14_ftsst,PASA_14_tsst,PASA_15_ftsst,PASA_15_tsst,PASA_16_ftsst,PASA_16_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,4,1,5,4,5,3,5,5,2,4,...,3,3,2.0,4.0,2.0,4.0,4.0,1.0,4.0,4.0
VP_02,4,2,4,4,2,2,3,3,1,4,...,3,3,3.0,4.0,2.0,4.0,4.0,2.0,3.0,3.0
VP_03,5,4,3,3,4,4,3,4,1,1,...,3,3,1.0,2.0,1.0,3.0,3.0,3.0,3.0,4.0
VP_04,4,4,3,3,4,4,1,5,0,1,...,1,1,1.0,1.0,1.0,4.0,4.0,3.0,4.0,3.0
VP_05,2,2,2,3,1,2,3,3,4,3,...,2,2,2.0,3.0,3.0,3.0,2.0,3.0,3.0,2.0


In [91]:
pasa_data.to_csv(base_path.joinpath("data_tabular/questionnaires/final/pasa.csv"))

## Add Handedness

In [None]:
handedness = pd.read_excel(quest_path_in.joinpath("handedness.xlsx"))
handedness = handedness.set_index("subject")["handedness"]

quest_data_out.insert(1, "handedness", handedness)
quest_data_out.head()

## Export

In [None]:
quest_data_out.to_excel(quest_path_out.joinpath("questionnaire_data.xlsx"))