# Merge Questionnaire Data

In [169]:
import json
import re
from pathlib import Path

import pandas as pd
import numpy as np
import pingouin as pg

import matplotlib.pyplot as plt
import seaborn as sns

from fau_colors import cmaps
import biopsykit as bp

%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [170]:
plt.close("all")

palette = sns.color_palette(cmaps.faculties)
sns.set_theme(context="notebook", style="ticks", palette=palette)

plt.rcParams["figure.figsize"] = (10, 5)
plt.rcParams["pdf.fonttype"] = 42
plt.rcParams["mathtext.default"] = "regular"

palette

In [171]:
deploy_type = "local"

In [172]:
config_dict = json.load(Path("../config.json").open(encoding="utf-8"))

base_path = Path(config_dict[deploy_type]["base_path"])
base_path

PosixPath('/Volumes/luca_ssd/Study_Data/2022_05_AP01_Macro')

In [173]:
code_mapping = pd.read_csv(base_path.joinpath("data_tabular/extras/code_to_number_mapping.csv"))
code_mapping = code_mapping.set_index("Code")
code_mapping.head()

Unnamed: 0_level_0,subject
Code,Unnamed: 1_level_1
BG05W,VP_01
HZ03B,VP_02
NW15N,VP_03
KS08F,VP_04
FA01B,VP_05


## Load Questionnaire Data and Map Code to VP-ID

In [199]:
quest_data = pd.read_excel(base_path.joinpath("data_tabular/questionnaires/cleaned/unipark_screening.xlsx"))
quest_data = quest_data.rename(columns={"VPN_Kennung": "Code"})
quest_data = quest_data.set_index("Code")

quest_data = quest_data.join(code_mapping)

quest_data = quest_data.dropna(subset=["subject"]).set_index("subject")

quest_data

Unnamed: 0_level_0,Unnamed: 0,lfdn,lastpage,Einverstaendnis_1,Einverstaendnis_2,Geschlecht,Alter,Familienstand,Bildungsabschluss,Beschaeftigungsverhaeltnis,...,TSGS_10,TSGS_11,TSGS_12,TSGS_13,TSGS_14,TSGS_15,Kontaktinformationen,session_id,datetime,date_of_last_access
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_09,0,306,7033731,1,1,1,25,1,5,3,...,3,1,1,4,1,1,leonieanmar@gmail.com,0a2f69be14fce3ce02678c38b781952b,2022-03-08 18:23:55,2022-03-08 18:42:51
VP_11,3,374,7033731,1,1,2,24,1,6,3,...,3,1,2,4,1,2,felix.landwehr@fau.de,97401b7bd3b86f0db28f78b37c3420fe,2022-03-11 08:01:42,2022-03-11 09:04:50
VP_25,5,554,7033731,1,1,2,23,1,5,3,...,3,1,2,3,1,2,julianweber75@googlemail.com,1259d0f4eea79e5686dca33cf22e08b7,2022-04-09 19:53:51,2022-04-09 20:13:05
VP_27,7,543,7033731,1,1,2,26,1,5,3,...,2,3,4,2,5,5,j.eisenbraun@protonmail.com +4915224167781,8f601e66a81898c702fa3e37e5df2cb0,2022-04-04 18:21:11,2022-04-04 19:00:43
VP_15,27,113,7033731,1,1,1,20,1,5,3,...,3,1,1,4,2,2,schwinghammerju@gmail.com,9bd439d112a15d0a0c314833c85fd3c7,2022-03-07 16:30:00,2022-03-07 17:07:33
VP_01,34,57,7033731,1,1,1,28,2,5,3,...,2,1,1,4,4,2,sophia.nike.eleutheria@gmail.com,25251c895624f0302f37239429715da1,2022-03-05 15:19:23,2022-03-05 15:56:28
VP_41,39,814,7033731,1,1,1,23,1,6,3,...,4,1,2,4,1,2,Lianyas@gmx.de,5ec44ce8db425c1d49008286736094c2,2022-05-01 09:15:02,2022-05-01 10:38:08
VP_21,42,339,7033731,1,1,2,18,1,5,3,...,2,2,1,4,2,3,corbinianplank@gmail.com,7d8b110850c4f8cf282df606e762d8f6,2022-03-09 18:08:58,2022-03-16 17:47:56
VP_33,46,712,7033731,1,1,2,31,1,5,3,...,4,1,2,4,1,2,eggers.patrick@gmx.de,c32b1e0a813569dfcbc930847ab894fa,2022-04-20 08:49:34,2022-04-20 09:37:03
VP_32,48,562,7033731,1,1,1,23,1,5,3,...,2,1,3,4,1,2,aitanasekora@yahoo.de,b5e0b4aa31ba0bbb25692ee888627b4f,2022-04-11 17:56:51,2022-04-11 19:02:32


In [200]:
# keep last entry for each subject
quest_data = quest_data.groupby("subject").last()

In [201]:
quest_data.replace(-77, np.nan, inplace=True)

In [202]:
quest_data

Unnamed: 0_level_0,Unnamed: 0,lfdn,lastpage,Einverstaendnis_1,Einverstaendnis_2,Geschlecht,Alter,Familienstand,Bildungsabschluss,Beschaeftigungsverhaeltnis,...,TSGS_10,TSGS_11,TSGS_12,TSGS_13,TSGS_14,TSGS_15,Kontaktinformationen,session_id,datetime,date_of_last_access
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,34,57,7033731,1,1,1,28,2,5,3,...,2,1,1,4,4,2,sophia.nike.eleutheria@gmail.com,25251c895624f0302f37239429715da1,2022-03-05 15:19:23,2022-03-05 15:56:28
VP_02,144,129,7033731,1,1,1,29,1,6,3,...,4,1,4,3,1,1,Wenjia.qian@web.de,4d23d83bb9769fcc258b122f6a83c970,2022-03-06 10:56:04,2022-03-06 11:23:07
VP_03,249,135,7033731,1,1,2,23,1,6,3,...,2,1,2,4,1,2,Dan.wagner@fau.de,923de6d24b854bcfd57098711e456b41,2022-03-06 11:48:48,2022-03-06 12:31:33
VP_04,179,177,7033731,1,1,2,24,1,5,3,...,3,2,2,4,2,1,tillschuerrle@web.de,e6777ca634c746e4771b82ac328543b8,2022-03-07 09:01:45,2022-03-07 09:42:25
VP_05,107,79,7033731,1,1,2,23,1,5,3,...,2,1,3,3,2,1,AminAyub@outlook.com 017663025646,41ff5d6932940deb599b80dde638ca7e,2022-03-05 18:58:29,2022-03-05 19:27:05
VP_06,240,308,7033731,1,1,2,19,1,5,3,...,3,1,1,4,1,2,robin.hoepp@gmail.com,37299e05cdf0972120d48b12f57a4d66,2022-03-08 18:43:42,2022-03-08 19:17:01
VP_07,274,328,7033731,1,1,1,23,2,4,3,...,4,1,1,5,1,2,h.froemmel@gmx.net,e577f7ec754e6d37f7e521520c58e4a0,2022-03-09 09:41:51,2022-03-09 10:22:48
VP_08,116,382,7033731,1,1,2,22,1,5,3,...,2,1,1,4,1,1,lucas.sedran@fau.de,25d5f53e05fd7e429a6224c8fb034860,2022-03-14 09:10:11,2022-03-14 09:42:53
VP_09,0,306,7033731,1,1,1,25,1,5,3,...,3,1,1,4,1,1,leonieanmar@gmail.com,0a2f69be14fce3ce02678c38b781952b,2022-03-08 18:23:55,2022-03-08 18:42:51
VP_10,90,371,7033731,1,1,2,26,1,4,3,...,2,3,3,3,4,4,fabian-gradl@gmx.de,68659b3a3484cc661a0c494b2761b1fa,2022-03-10 19:36:17,2022-03-10 20:26:11


In [203]:
# load renaming json
renaming_dict = json.load(Path("renaming.json").open(encoding="utf-8"))

renaming_dict

{'Geschlecht': 'Gender',
 'Alter': 'Age',
 'Geburtsdatum': 'Birth_Date',
 'Familienstand': 'Marital',
 'Bildungsabschluss': 'Education',
 'Beschaeftigungsverhaeltnis': 'Profession',
 'Beschaeftigungsverhaeltnis_Sonstige': 'Profession_Other',
 'Ethnische_Herkunft': 'Ethnicity',
 'Studiengang': 'Course_Study',
 'Berufsfeld': 'Professional_Field',
 'Muttersprache': 'Native_Language',
 'Koerpergroesse': 'Height',
 'Koerpergewicht': 'Weight',
 'Haendigkeit': 'Handedness',
 'Kontrazeptiva_Keine': 'Contraceptives_None',
 'Kontrazeptiva_Pille': 'Contraceptives_Pill',
 'Kontrazeptiva_hormonfrei': 'Contraceptives_Hormone_Free',
 'Kontrazeptiva_hormonell_andere': 'Contraceptives_Hormone_Other',
 'Menstruation_regelm_Zyklus': 'Menstruation_Regular_Cycle',
 'Menstruation_unregelm_Zyklus_Grund': 'Menstruation_Unregular_Cycle_Reason',
 'Menstruation_letzte_Regelblutung': 'Menstruation_Last_Period',
 'Zykluslaenge': 'Cycle_Length',
 'Menstruation_Zyklus_Schwankung': 'Menstruation_Cycle_Fluctuations',


In [204]:
quest_data = quest_data.rename(renaming_dict, axis=1)

# get only columns that  are in renaming dict and in the quest_data
columns = list(set(list(renaming_dict.values())) & set(quest_data.columns))

quest_data_filtered = quest_data[columns]

quest_data_filtered.head()

Unnamed: 0_level_0,Disease_Skeleton,Marital,Disease_Eye,Native_Language,Medication_Anti_Inflammatory,Education,Menstruation_Last_Period,Menstruation_Ovulation_Tracking,Menstruation_Cycle_Fluctuations,Date_Ovulation,...,Disease_Liver,Disease_Kidney,Disease_Acute_Symptoms,Profession_Other,Disease_Nervous_System,Disease_Cancer,Disease_Metabolism,Menstruation_Regular_Cycle,Medication_Painkiller,Age
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,2,2,2,1,2,5,2022-01-06,0.0,4.0,2022-02-16,...,2,2,2,-99,2,2,2,2.0,2,28
VP_02,2,1,2,1,2,6,2022-03-06,,1.0,-66,...,2,2,2,-99,2,2,2,1.0,2,29
VP_03,2,1,2,1,2,6,-66,,,-66,...,2,2,2,-99,2,2,2,,2,23
VP_04,2,1,2,1,2,5,-66,,,-66,...,2,2,2,-99,2,2,2,,2,24
VP_05,2,1,2,1,2,5,-66,,,-66,...,2,2,2,-99,2,2,2,,2,23


In [205]:
# replace all missing values
quest_data_filtered = quest_data_filtered.replace("-66", np.nan)
quest_data_filtered = quest_data_filtered.replace(-77, np.nan)

In [206]:
quest_data_filtered

Unnamed: 0_level_0,Disease_Skeleton,Marital,Disease_Eye,Native_Language,Medication_Anti_Inflammatory,Education,Menstruation_Last_Period,Menstruation_Ovulation_Tracking,Menstruation_Cycle_Fluctuations,Date_Ovulation,...,Disease_Liver,Disease_Kidney,Disease_Acute_Symptoms,Profession_Other,Disease_Nervous_System,Disease_Cancer,Disease_Metabolism,Menstruation_Regular_Cycle,Medication_Painkiller,Age
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,2,2,2,1,2,5,2022-01-06,0.0,4.0,2022-02-16,...,2,2,2,-99,2,2,2,2.0,2,28
VP_02,2,1,2,1,2,6,2022-03-06,,1.0,,...,2,2,2,-99,2,2,2,1.0,2,29
VP_03,2,1,2,1,2,6,,,,,...,2,2,2,-99,2,2,2,,2,23
VP_04,2,1,2,1,2,5,,,,,...,2,2,2,-99,2,2,2,,2,24
VP_05,2,1,2,1,2,5,,,,,...,2,2,2,-99,2,2,2,,2,23
VP_06,2,1,2,1,2,5,,,,,...,2,2,2,-99,2,2,2,,2,19
VP_07,2,2,2,1,2,4,2022-03-14,1.0,4.0,2022-03-23,...,2,2,2,-99,2,2,2,2.0,2,23
VP_08,2,1,2,1,2,5,,,,,...,2,2,2,-99,2,2,2,,2,22
VP_09,2,1,2,1,2,5,2022-03-01,,2.0,,...,2,2,2,-99,2,2,2,1.0,2,25
VP_10,2,1,2,1,2,4,,,,,...,2,2,2,-99,2,2,2,,2,26


## Add Condition Order

In [207]:
condition_order = pd.read_csv(base_path.joinpath("data_tabular/extras/condition_order.csv"))
condition_order = condition_order.set_index("subject")["condition_order"]
condition_order.head()

# change condtion order to codes
condition_order = condition_order.replace("tsst_first", 1)
condition_order = condition_order.replace("ftsst_first", 2)

quest_data_filtered.insert(1, "Condition_Order", condition_order)

In [208]:
quest_data_filtered

Unnamed: 0_level_0,Disease_Skeleton,Condition_Order,Marital,Disease_Eye,Native_Language,Medication_Anti_Inflammatory,Education,Menstruation_Last_Period,Menstruation_Ovulation_Tracking,Menstruation_Cycle_Fluctuations,...,Disease_Liver,Disease_Kidney,Disease_Acute_Symptoms,Profession_Other,Disease_Nervous_System,Disease_Cancer,Disease_Metabolism,Menstruation_Regular_Cycle,Medication_Painkiller,Age
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,2,1,2,2,1,2,5,2022-01-06,0.0,4.0,...,2,2,2,-99,2,2,2,2.0,2,28
VP_02,2,1,1,2,1,2,6,2022-03-06,,1.0,...,2,2,2,-99,2,2,2,1.0,2,29
VP_03,2,2,1,2,1,2,6,,,,...,2,2,2,-99,2,2,2,,2,23
VP_04,2,2,1,2,1,2,5,,,,...,2,2,2,-99,2,2,2,,2,24
VP_05,2,2,1,2,1,2,5,,,,...,2,2,2,-99,2,2,2,,2,23
VP_06,2,2,1,2,1,2,5,,,,...,2,2,2,-99,2,2,2,,2,19
VP_07,2,1,2,2,1,2,4,2022-03-14,1.0,4.0,...,2,2,2,-99,2,2,2,2.0,2,23
VP_08,2,1,1,2,1,2,5,,,,...,2,2,2,-99,2,2,2,,2,22
VP_09,2,1,1,2,1,2,5,2022-03-01,,2.0,...,2,2,2,-99,2,2,2,1.0,2,25
VP_10,2,1,1,2,1,2,4,,,,...,2,2,2,-99,2,2,2,,2,26


In [209]:
# add to codebook

codebook = pd.read_csv(base_path.joinpath("data_tabular/questionnaires/codebook.csv"))
codebook.set_index("variable", inplace=True)

In [210]:
codebook

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,11,12,13
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Gender,female,male,other,,,,,,,,,,
Education,kein Schulabschluss,Hauptschule,Mittlere Reife,Fachabitur,Abitur,Bachelor,Master/Diplom,Promotion,Habilitation,,,,
Marital,ledig,feste Partnerschaft,verheiratet,getrennt lebend,geschieden,verwitwet,,,,,,,
Profession,Arbeitslos/Arbeitssuchend,Auszubildende/r,Student/in,Hausfrau/mann,Angestellte/r,Beamte/in,Selbstständig,Elternzeit,Beurlaubt,Sabbatjahr,Berufsunfähig/Frührente,Ruhestand,sonstiges
Ethnicity,Asiatisch,Arabisch,Afrikanisch,Indigen,Weiß,Andere,Keine Antwort,,,,,,
Condition_Order,tsst_first,ftsst_first,,,,,,,,,,,
Handedness,right,left,,,,,,,,,,,


In [211]:
# add row for condition order with nan values
codebook.loc["Condition_Order"] = np.nan

codebook.loc["Condition_Order", "1"] = "tsst_first"
codebook.loc["Condition_Order", "2"] = "ftsst_first"

In [212]:
# export
codebook.to_csv(base_path.joinpath("data_tabular/questionnaires/codebook.csv"))

## Add Handedness

In [213]:
handedness = pd.read_excel(base_path.joinpath("data_tabular/extras/handedness.xlsx"))
handedness = handedness.set_index("subject")["handedness"]

# recode handedness
handedness = handedness.replace("right", 1)
handedness = handedness.replace("left", 2)

quest_data_filtered.insert(1, "Handedness", handedness)
quest_data_filtered.head()

Unnamed: 0_level_0,Disease_Skeleton,Handedness,Condition_Order,Marital,Disease_Eye,Native_Language,Medication_Anti_Inflammatory,Education,Menstruation_Last_Period,Menstruation_Ovulation_Tracking,...,Disease_Liver,Disease_Kidney,Disease_Acute_Symptoms,Profession_Other,Disease_Nervous_System,Disease_Cancer,Disease_Metabolism,Menstruation_Regular_Cycle,Medication_Painkiller,Age
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,2,1,1,2,2,1,2,5,2022-01-06,0.0,...,2,2,2,-99,2,2,2,2.0,2,28
VP_02,2,1,1,1,2,1,2,6,2022-03-06,,...,2,2,2,-99,2,2,2,1.0,2,29
VP_03,2,1,2,1,2,1,2,6,,,...,2,2,2,-99,2,2,2,,2,23
VP_04,2,2,2,1,2,1,2,5,,,...,2,2,2,-99,2,2,2,,2,24
VP_05,2,1,2,1,2,1,2,5,,,...,2,2,2,-99,2,2,2,,2,23


In [214]:
# add to codebook

# add row for handedness with nan values
codebook.loc["Handedness"] = np.nan

codebook.loc["Handedness", "1"] = "right"
codebook.loc["Handedness", "2"] = "left"

# export
codebook.to_csv(base_path.joinpath("data_tabular/questionnaires/codebook.csv"))

In [215]:
quest_data_filtered

Unnamed: 0_level_0,Disease_Skeleton,Handedness,Condition_Order,Marital,Disease_Eye,Native_Language,Medication_Anti_Inflammatory,Education,Menstruation_Last_Period,Menstruation_Ovulation_Tracking,...,Disease_Liver,Disease_Kidney,Disease_Acute_Symptoms,Profession_Other,Disease_Nervous_System,Disease_Cancer,Disease_Metabolism,Menstruation_Regular_Cycle,Medication_Painkiller,Age
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,2,1,1,2,2,1,2,5,2022-01-06,0.0,...,2,2,2,-99,2,2,2,2.0,2,28
VP_02,2,1,1,1,2,1,2,6,2022-03-06,,...,2,2,2,-99,2,2,2,1.0,2,29
VP_03,2,1,2,1,2,1,2,6,,,...,2,2,2,-99,2,2,2,,2,23
VP_04,2,2,2,1,2,1,2,5,,,...,2,2,2,-99,2,2,2,,2,24
VP_05,2,1,2,1,2,1,2,5,,,...,2,2,2,-99,2,2,2,,2,23
VP_06,2,1,2,1,2,1,2,5,,,...,2,2,2,-99,2,2,2,,2,19
VP_07,2,1,1,2,2,1,2,4,2022-03-14,1.0,...,2,2,2,-99,2,2,2,2.0,2,23
VP_08,2,1,1,1,2,1,2,5,,,...,2,2,2,-99,2,2,2,,2,22
VP_09,2,1,1,1,2,1,2,5,2022-03-01,,...,2,2,2,-99,2,2,2,1.0,2,25
VP_10,2,1,1,1,2,1,2,4,,,...,2,2,2,-99,2,2,2,,2,26


## Export Base

In [216]:
quest_data_filtered.sort_index(axis=1, inplace=True)
quest_data_filtered.to_csv(base_path.joinpath("data_tabular/questionnaires/final/base_data.csv"))

## PASA

In [217]:
pasa_data = pd.read_excel(base_path.joinpath("data_tabular/questionnaires/raw/pasa.xlsx"))
pasa_data = pasa_data.set_index(["subject", "condition"])

# add "PASA_" to column names 
pasa_data.columns = "PASA_" + pasa_data.columns.astype(str).str.zfill(2)
pasa_data = pasa_data.unstack("condition")

pasa_data = pasa_data.sort_index(axis=1, level="condition")

# flatten multiindex
pasa_data.columns = ["_".join(col).strip() for col in pasa_data.columns.values]

pasa_data.head()


Unnamed: 0_level_0,PASA_01_ftsst,PASA_02_ftsst,PASA_03_ftsst,PASA_04_ftsst,PASA_05_ftsst,PASA_06_ftsst,PASA_07_ftsst,PASA_08_ftsst,PASA_09_ftsst,PASA_10_ftsst,...,PASA_07_tsst,PASA_08_tsst,PASA_09_tsst,PASA_10_tsst,PASA_11_tsst,PASA_12_tsst,PASA_13_tsst,PASA_14_tsst,PASA_15_tsst,PASA_16_tsst
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VP_01,4,5,5,5,2,2,0,4.0,2,3,...,3,4.0,0,1,2,3,4.0,4.0,1.0,4.0
VP_02,4,4,2,3,1,2,3,2.0,3,3,...,4,1.0,2,1,0,3,4.0,4.0,2.0,3.0
VP_03,5,3,4,3,1,2,1,3.0,5,3,...,0,4.0,4,2,3,3,2.0,3.0,3.0,4.0
VP_04,4,3,4,1,0,4,0,4.0,5,5,...,0,3.0,3,2,4,1,1.0,4.0,3.0,3.0
VP_05,2,2,1,3,4,1,3,3.0,2,2,...,3,3.0,2,2,2,2,3.0,3.0,3.0,2.0


In [218]:
pasa_data.to_csv(base_path.joinpath("data_tabular/questionnaires/final/pasa.csv"))

## ADS-L

In [219]:
quest_data.filter(like="ADS").to_csv(base_path.joinpath("data_tabular/questionnaires/final/ads.csv"))

## STADI

In [220]:
quest_data.filter(like="STADI").to_csv(base_path.joinpath("data_tabular/questionnaires/final/stadi.csv"))

## Brief Cope

In [221]:
quest_data.filter(like="Brief").to_csv(base_path.joinpath("data_tabular/questionnaires/final/brief_cope.csv"))

## PSS

In [222]:
quest_data.filter(like="PSS").to_csv(base_path.joinpath("data_tabular/questionnaires/final/pss.csv"))

## BFIK

In [224]:
quest_data.filter(like="BFIK").to_csv(base_path.joinpath("data_tabular/questionnaires/final/bfik.csv"))

## RSE

In [226]:
quest_data.filter(like="RSE").to_csv(base_path.joinpath("data_tabular/questionnaires/final/rse.csv"))

## SCS

In [228]:
quest_data.filter(like="SCS").to_csv(base_path.joinpath("data_tabular/questionnaires/final/scs.csv"))

## RSQ

In [230]:
quest_data.filter(like="RSQ").to_csv(base_path.joinpath("data_tabular/questionnaires/final/rsq.csv"))

## BES

In [232]:
quest_data.filter(like="BES").to_csv(base_path.joinpath("data_tabular/questionnaires/final/bes.csv"))

## SOC

In [233]:
quest_data.filter(like="SOC").to_csv(base_path.joinpath("data_tabular/questionnaires/final/soc.csv"))

## TSGS

In [235]:
quest_data.filter(like="TSGS").to_csv(base_path.joinpath("data_tabular/questionnaires/final/tsgs.csv"))