In [4]:
#pip install ucimlrepo
# Little, M. (2007). Parkinsons [Dataset]. UCI Machine Learning Repository. https://doi.org/10.24432/C59C74.

In [None]:
# fetch dataset from ucimlrepo
from ucimlrepo import fetch_ucirepo
import pandas as pd

parkinsonData = fetch_ucirepo(id = 174)

X = parkinsonData.data.features
y = parkinsonData.data.targets

# remove the duplicated columns of percentage of jitters / tremors
    # add "1" to the end of each duplicated column name to differentiate for later function
def duplicate_remover(df):
    cols = df.columns.tolist()
    new_cols = []
    seen = {}
    for col in cols:
        if col in seen:
            seen[col] += 1
            new_cols.append(f"{col}_{seen[col]}")
        else:
            seen[col] = 0
            new_cols.append(col)
    df.columns = new_cols
    return df

# rename columns to more specific names
def rename_columns(df):
    column_mapping = {
        'MDVP:Fo': 'avg_frequency_hz',
        'MDVP:Fhi': 'max_frequency_hz',
        'MDVP:Flo': 'min_frequency_hz',
        'MDVP:Jitter': 'jitter_percent',
        'MDVP:Jitter1': 'jitter_abs',
        'MDVP:RAP': 'jitter_rap',
        'MDVP:PPQ': 'jitter_ppq',
        'Jitter:DDP': 'jitter_ddp',
        'MDVP:Shimmer': 'shimmer_percent',
        'MDVP:Shimmer1': 'shimmer_abs',
        'MDVP:APQ': 'shimmer_apq',
        'Shimmer:DDA': 'shimmer_dda',
        'NHR': 'noise_to_harmonics_ratio',
        'HNR': 'harmonics_to_noise_ratio',
        'RPDE': 'recurrence_period_density_entropy',
        'DFA': 'detrended_fluctuation_analysis',
        'spread1': 'frequency_variation_1',
        'spread2': 'frequency_variation_2',
        'D2': 'correlation_dimension',
        'PPE': 'pitch_period_entropy'
    }
    df_renamed = df.rename(columns = column_mapping)
    return df_renamed

# combine both the features and targets of dataset to correlate status of patients with variables
    # remove the duplicated columns (adding "1") then rename for specificity
df_combined = rename_columns(duplicate_remover(pd.concat([X, y], axis=1)))

In [3]:
import altair as alt

# create scatterplot of frequencies vs Parkinson's symptoms
# interactive actions of selecting between max / min / avg frequency of speech and jitters / tremors symptoms
# additionally, isolation of status between Parkinson's and healthy patients can be made
input_radio = alt.binding_radio(
   options = [None, 0, 1],
   labels = ["Both Displayed", "Healthy", "Parkinson's"],
   name = "Patient Health: "
)
radio_selection = alt.selection_point(
   fields = ['status'],
   bind = input_radio,
   value = None
)

xcol_param = alt.param(
    name = 'SelectFrequency',
    value = 'avg_frequency_hz',
    bind = alt.binding_select(
        options = ["avg_frequency_hz", "max_frequency_hz", "min_frequency_hz"],
        labels = ["Average", "Maximum", "Minimum"],
        name = "Select Frequency Type: "
    )
)
ycol_param = alt.param(
    name = 'SelectSymptom',
    value = 'jitter_percent',
    bind = alt.binding_select(
        options = ["jitter_percent", "shimmer_percent"],
        labels = ["Jitter", "Shimmer"],
        name = "Select Symptom: "
    )
)

visual = (alt.Chart(df_combined)
    .mark_circle(opacity = 0.8, size = 100)
    .encode(
        x = alt.X('dynamic_x:Q', title = 'Selected Frequency (Hz)'),
        y = alt.Y("dynamic_y:Q", title = "Selected Symptom"),
        color = alt.condition(
            radio_selection,
            alt.Color("status:N", legend = None),
            alt.value('lightgray')
        ),
        opacity = alt.condition(
            radio_selection,
            alt.value(1),
            alt.value(0.1)
        ),
        tooltip = [
            alt.Tooltip("avg_frequency_hz:Q", title = "Average Frequency (Hz)"),
            alt.Tooltip("max_frequency_hz:Q", title = "Maximum Frequency (Hz)"),
            alt.Tooltip("min_frequency_hz:Q", title = "Minimum Frequency (Hz)"),
            alt.Tooltip("jitter_percent:Q", title = "Jitter Percentage"),
            alt.Tooltip("shimmer_percent:Q", title = "Shimmer Percentage")
        ]
    ).properties(
        title = "Vocal Frequency vs Symptom Percentage",
        width = 500,
        height = 400
    ).transform_calculate(
        dynamic_x = f'datum[{xcol_param.name}]',
        dynamic_y = f'datum[{ycol_param.name}]'
    ).add_params(radio_selection, xcol_param, ycol_param)
)

visual.save("audioRecording.html")