In [None]:
# Goldberger, A., Amaral, L., Glass, L., Hausdorff, J., Ivanov, P. C., Mark, R., ... & Stanley, H. E. (2000). PhysioBank, PhysioToolkit, and PhysioNet: Components of a new research resource for complex physiologic signals. Circulation [Online]. 101 (23), pp. e215â€“e220. RRID:SCR_007345.
# https://doi.org/10.13026/m0w9-zx22

In [2]:
# run following python code in util directory
from utils.constants import questionnaire_dir
from utils.data_handling import load_all_files
import pandas as pd

# create csv files for questionnaire (store in preprocessed directory)
data_path = '../preprocessed/'
df = pd.concat(load_all_files(questionnaire_dir))
df.to_csv(f'{data_path}questionnaire_file_list.csv', index=False, sep=',')

OSError: Cannot save file into a non-existent directory: 'preprocessed'

In [1]:
import pandas as pd
# combine questionnaire and patient information csv files
# shorten csv files: keep relevant columns
quest_old_df = pd.read_csv('questionnaire_file_list.csv')
quest_relevant_columns = ['subject_id', 'link_id', 'answer']
quest_df = quest_old_df[quest_relevant_columns]

info_old_df = pd.read_csv('patient_info.csv')
info_relevant_columns = ['id', 'condition', 'disease_comment', 'age_at_diagnosis', 'age', 'height', 'weight', 'gender', 'label']
# use relevant columns and rename 'id' to 'subject_id' to match questionnaire
info_df = info_old_df[info_relevant_columns].rename(columns = {'id': 'subject_id'})

# use a LEFT JOIN merge since questionnaire uses 'subject_id' multiple times
combined_df = pd.merge(quest_df, info_df, on = 'subject_id', how = 'left')

# rename 'link_id' to 'question_id' for specificity
combined_df = combined_df.rename(columns = {'link_id': 'question_id'})

# filter only the "true" answers
true_responses = combined_df[combined_df['answer'] == True]

# group by question and condition with a total_count for "true" answers
summary_df = true_responses.groupby(['question_id', 'label']).size().reset_index(name='total_count')

In [2]:
# for interactive function of switching between question categories, map question_id with category based on the dataset website
category_map = {1:'gastrointestinal_tract',
                3:'gastrointestinal_tract',
                4:'gastrointestinal_tract',
                5:'gastrointestinal_tract',
                6:'gastrointestinal_tract',
                7:'gastrointestinal_tract',
                8:'urinal_tract',
                9:'urinal_tract',
                10:'pain',
                11:'miscellaneous',
                28:'miscellaneous',
                29:'miscellaneous',
                12:'apathy/attention/memory',
                13:'apathy/attention/memory',
                15:'apathy/attention/memory',
                2:'distortion_of_perception',
                14:'distortion_of_perception',
                30:'distortion_of_perception',
                16:'depression/anxiety',
                17:'depression/anxiety',
                18:'sexual_function',
                19:'sexual_function',
                20:'cardiovascular',
                21:'cardiovascular',
                27:'cardiovascular',
                22:'sleep/fatigue',
                23:'sleep/fatigue',
                24:'sleep/fatigue',
                25:'sleep/fatigue',
                26:'sleep/fatigue'}
# add 'category' to the summary_df
summary_df['category'] = summary_df['question_id'].map(category_map)

# specify the label column with condition name rather than number
label_map = {0:'Healthy',
             1:'Parkinson\'s',
             2:'Other motor disease'}
summary_df['label'] = summary_df['label'].map(label_map)
print(summary_df.head())

# create new column with the specific question asked
question_map = {1:'Dribbling of saliva during the daytime',
                2:'Loss or change in your ability to taste or smell',
                3:'Difficulty swallowing food or drink or problems with choking',
                4:'Vomiting or feelings of sickness (nausea)',
                5:'Constipation (less than 3 bowel movements a week) or having to strain to pass a stool (faeces)',
                6:'Bowel (fecal) incontinence',
                7:'Feeling that your bowel emptying is incomplete after having been to the toilet',
                8:'A sense of urgency to pass urine makes you rush to the toilet',
                9:'Getting up regularly at night to pass urine',
                10:'Unexplained pains (not due to known conditions such as arthritis)',
                11:'Unexplained change in weight (not due to change in diet)',
                12:'Problems remembering things that have happened recently or forgetting to do things',
                13:'Loss of interest in what is happening around you or doing things',
                14:'Seeing or hearing things that you know or are told are not there',
                15:'Difficulty concentrating or staying focussed',
                16:'\"Feeling sad, low or blue\"',
                17:'\"Feeling anxious, frightened or panicky\"',
                18:'Feeling less interested in sex or more interested in sex',
                19:'Finding it difficult to have sex when you try',
                20:'\"Feeling light headed, dizzy or weak standing from sitting or lying\"',
                21:'Falling',
                22:'\"Finding it difficult to stay awake during activities such as working, driving or eating\"',
                23:'Difficulty getting to sleep at night or staying asleep at night',
                24:'\"Intense, vivid dreams or frightening dreams\"',
                25:'Talking or moving about in your sleep as if you are acting out a dream',
                26:'\"Unpleasant sensations in your legs at night or while resting, and a feeling that you need to move\"',
                27:'Swelling of your legs',
                28:'Excessive sweating',
                29:'Double vision',
                30:'Believing things are happening to you that other people say are not true'}
summary_df['question'] = summary_df['question_id'].map(question_map)

   question_id                label  total_count                  category
0            1              Healthy            2    gastrointestinal_tract
1            1          Parkinson's          110    gastrointestinal_tract
2            1  Other motor disease           22    gastrointestinal_tract
3            2              Healthy            5  distortion_of_perception
4            2          Parkinson's          138  distortion_of_perception


In [3]:
import altair as alt

input_select = alt.binding_select(
   options = ['gastrointestinal_tract',
              'urinal_tract',
              'pain',
              'miscellaneous',
              'apathy/attention/memory',
              'distortion_of_perception',
              'depression/anxiety',
              'sexual_function',
              'cardiovascular',
              'sleep/fatigue'],
   labels = ['Gastrointestinal Tract',
             'Urinal Tract',
             'Pain',
             'Miscellaneous',
             'Apathy / Attention / Memory',
             'Distortion of Perception',
             'Depression / Anxiety',
             'Sexual Function',
             'Cardiovascular',
             'Sleep / Fatigue',],
   name = 'Select Questionnaire Category: '
)

selection = alt.selection_point(
    fields = ['category'],
    bind = input_select,
    value = 'gastrointestinal_tract'
)

visual = (alt.Chart(summary_df)
    .mark_bar()
    .encode(
        x = alt.X('question:N', title = 'Question Asked'),
        y = alt.Y('total_count:Q', title = 'Total Subjects Answering True'),
        color = alt.Color('label:N', title = 'Patient Group'),
        xOffset = 'label:N',
        tooltip = [
            alt.Tooltip('question:N', title = 'Question Asked'),
            alt.Tooltip('total_count:Q', title = 'Total Subjects Answering True'),
            alt.Tooltip('label:N', title = 'Patient Health')
        ]
    )
    .properties(
        width = 500,
        height = 500,
        title = "Questionnaire Responses by Category"
    )
    .add_params(selection)
    .transform_filter(selection)
)

visual.save('questionnaire.html')