This notebook generates the all_questionnaires.csv file, which comes from every subjects S'X'quest.csv'.
Refer to the read.me 

In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyspark.sql import SparkSession
import os
import csv
import pickle

Matplotlib created a temporary cache directory at /scratch/tsanchez/job_39246932/matplotlib-rnn4x2ev because the default path (/home/jovyan/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
base_path = "../ialtamirano/raw_data/WESAD"

In [5]:
# check order of experimental conditions in S2_quest.csv
quest_path = '../ialtamirano/raw_data/WESAD/S2/S2_quest.csv'

# map questionnaire column numbers to experimental conditions
with open(quest_path) as f:
    for line in f:
        if line.startswith('# ORDER'):
            order_tokens = line.strip().split(';') 
            conditions = order_tokens[1:6] # takes the 5 condition names
            for idx, cond in enumerate(conditions, start=1):  # pair each condition with 1-based index
                print(f'{idx}: {cond}')
            break

    # read the rest of the file into a list, keeping only non-blank lines:
    raw_lines = [line.strip() for line in f if line.strip()]

# Ge'# TAGS from raw_lines list
tags = set()
for line in raw_lines:
    if line.startswith('#'):
        tag = line.split(';')[0]
        tags.add(tag)
print(f'Tags Present: {tags}')   

1: Base
2: TSST
3: Medi 1
4: Fun
5: Medi 2
Tags Present: {'# DIM', '# PANAS', '# END', '# START', '# SSSQ', '# STAI'}


In [8]:
# combining all questionnaries and storing in new csv
QUESTIONNAIRE_CSV_PATH = './all_questionnaires.csv'

# Defining names of each of the items based on pdf data file 

# PANAS: 24 items
PANAS_QUESTIONS = [q.lower() for q in [
    'Active', 'Distressed', 'Interested', 'Inspired', 
    'Annoyed', 'Strong', 'Guilty', 'Scared', 'Hostile', 'Excited', 
    'Proud', 'Irritable', 'Enthusiastic', 'Ashamed', 'Alert', 'Nervous', 
    'Determined', 'Attentive', 'Jittery', 'Afraid', 'Stressed', 'Frustrated',
    'Happy', 'Sad']]

# STAI: 6 items
STAI_QUESTIONS = [q.lower() for q in ['I_feel_at_ease', 'I_feel_nervous', 'I_am_jittery', 'I_am_relaxed',
              'I_am_worried', 'I_feel_pleasant']]
    
# SAM: 2 items
SAM_QUESTIONS = [q.lower() for q in ['Valence', 'Arousal']]

#SSSQ: 6 items
SSSQ_QUESTIONS = [q.lower() for q in [
    'Committed_to_goals', 'Wanted_to_succeed', 'Motivated', 
    'Reflected_about_self', 'Worried_about_others', 'Concerned_about_impression']]


# map each of 4 target conditions to questionnaire indix
condition_index_map = {1:0, 2:1, 3:3, 4:2}
condition_name_map = {1:'baseline', 2:'stress', 3:'amusement', 4:'meditation'}

# initialize dictionary for each (subject, condition) pair. 
questionnaire_records = []

# process ea. subjects questionnaire CSV (exclude invalid)
for subject_id in sorted(os.listdir(base_path)):
    if not subject_id.startswith('S') or subject_id in {'S1','S12'}:
        continue

    #setting questionaire file path, read all non-empty lines 
    csv_path = os.path.join(base_path, subject_id, f'{subject_id}_quest.csv')
    with open(csv_path) as f:
        raw_line = [line.strip() for line in f if line.strip()]

    # filter each raw line into 1 of the 4 possible lists:
    panas_lines = [line for line in raw_line if line.startswith('# PANAS')]
    stais_lines = [line for line in raw_line if line.startswith('# STAI')]
    sams_lines = [line for line in raw_line if line.startswith('# DIM')]
    sssq_lines = [line for line in raw_line if line.startswith('# SSSQ')]

    #fcn return tokens of items w/out tag
    def split_answer_tokens(line):
        return line.split(';')[1:] # return only numeric string-no tag


    #one record per condition
    for condition_id in range(1,5):
        record = {
            'subject_id': subject_id,
            'condition_id': condition_id,
            'condition_name': condition_name_map[condition_id]}
            
        idx = condition_index_map[condition_id]

    #for all 4 questionnaires: lets get line, split off tag, get tokens & convert into int
        # PANAS: 
        answers = split_answer_tokens(panas_lines[idx])
        
        for j, questions in enumerate(PANAS_QUESTIONS):
            tokens = answers[j]
            try:
                record[f'panas_{questions}'] = int(tokens)
            except ValueError:
                record[f'panas_{questions}'] = pd.NA

        # STAI: 
        answers = split_answer_tokens(stais_lines[idx])
        for j, questions in enumerate(STAI_QUESTIONS):
            tokens = answers[j]
            try:
                record[f'stai_{questions}'] = int(tokens)
            except ValueError:
                record[f'stai_{questions}'] = pd.NA
        
        # SAM:
        answers = split_answer_tokens(sams_lines[idx])

        for j, questions in enumerate(SAM_QUESTIONS):
            tokens = answers[j]
            try:
                record[f'sam_{questions}'] = int(tokens)
            except ValueError:
                record[f'sam_{questions}'] = pd.NA

        # SSSQ: ONLY for stress(condition 2)
        if condition_id == 2:
            answers = split_answer_tokens(sssq_lines[0])
            for j, questions in enumerate(SSSQ_QUESTIONS):
                tokens = answers[j]
                try:
                    record[f'sssq_{questions}'] = int(tokens)
                except ValueError:
                    record[f'sssq_{questions}'] = pd.NA
    # for all other conditions, fill SSSQ fields w/ NA nonapplicable                 
        else:                                           
            for questions in SSSQ_QUESTIONS:
                record[f'sssq_{questions}'] = pd.NA  

        questionnaire_records.append(record) # append subject-condition dictionary to list 


# combine & save all records into a DF
df_all = pd.DataFrame(questionnaire_records)
df_all.to_csv(QUESTIONNAIRE_CSV_PATH, index=False)

print('Saved combined to ', QUESTIONNAIRE_CSV_PATH)

Saved all questionnaires to ./all_questionnaires.csv
