In [2]:
import pandas as pd
import numpy as np
import json
import calculations
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
import statsmodels.api as sm
import warnings
from scipy.stats import iqr, skew, kurtosis, ttest_ind

In [3]:
# Disable runtime warnings due to datasets containing NaNs after skew/kurtosis calcs
warnings.filterwarnings('ignore', category=RuntimeWarning)

# Process data

stimuli = []
outliers = []
participants = []
scale_type = {
    "gesture_pitch_number": 100,
    "gesture_roll_number": 100,
    "slider_number": 100,
    "gesture_pitch_greyscale": 49,
    "gesture_roll_greyscale": 49,
    "slider_greyscale": 49,
}

outlier_cut_off = {
    "gesture_pitch_number": 50,
    "gesture_roll_number": 50,
    "slider_number": 50,
    "gesture_pitch_greyscale": 25,
    "gesture_roll_greyscale": 25,
    "slider_greyscale": 25,
}

truth_value_batches_100 = [
    [i for i in range(0, 10)],
    [i for i in range(10, 20)],
    [i for i in range(20, 30)],
    [i for i in range(30, 40)],
    [i for i in range(40, 50)],
    [i for i in range(50, 60)],
    [i for i in range(60, 70)],
    [i for i in range(70, 80)],
    [i for i in range(80, 90)],
    [i for i in range(90, 101)]
]

truth_value_batches_49 = [
    [i for i in range(0, 10)],
    [i for i in range(10, 20)],
    [i for i in range(20, 30)],
    [i for i in range(30, 40)],
    [i for i in range(40, 50)]
]

experiment_truth_type = {
    "gesture_pitch_number": {
        "truth": "pitch_truth",
        "truth_diff": "pitch_truth_diff",
        "label": "Pitch Truth",
        "input_type": "device"
    },
    "gesture_roll_number": {
        "truth": "roll_truth",
        "truth_diff": "roll_truth_diff",
        "label": "Roll Truth",
        "input_type": "device"
    },
     "slider_number": {
        "truth": "value",
        "truth_diff": "slider_diff",
        "label": "Slider Value",
        "input_type": "slider"
    },
    "gesture_pitch_greyscale": {
        "truth": "pitch_truth",
        "truth_diff": "pitch_truth_diff",
        "label": "Pitch Truth",
        "input_type": "device"
    },
    "gesture_roll_greyscale": {
        "truth": "roll_truth",
        "truth_diff": "roll_truth_diff",
        "label": "Roll Truth",
        "input_type": "device"
    },
    "slider_greyscale": {
        "truth": "value",
        "truth_diff": "slider_diff",
        "label": "Slider Value",
        "input_type": "slider"
    },
}

def ensure_not_outlier(value, experiment_type):
    cut_off = outlier_cut_off[experiment_type]
    return value <= cut_off

with open('experiment_data/combined_participant_data.json', 'r') as file:
    data = json.load(file)

    if data:
        for participant in data:
            participant_id = participant.get("id")
            participants.append(participant)
            for experiment in participant["completedExperiments"]:
                experiment_type = experiment.get("experimentType")
                started_date = experiment.get("startedDate")
                ended_date = experiment.get("endedDate")
                for stimulus in experiment["successfulStimuli"]:
                    sensor_reading = stimulus.get("sensorReading", {})
                    quaternion = (
                        sensor_reading.get("w"),
                        sensor_reading.get("x"),
                        sensor_reading.get("y"),
                        sensor_reading.get("z"),
                    )
                    x, y, z, w = quaternion
                    _, roll, pitch = calculations.quaternion_to_euler(x, y, z, w)
                    
                    sensor_reading["roll"] = roll
                    sensor_reading["pitch"] = pitch

                    pitch_scaled = calculations.scale_pitch(pitch, scale_type[experiment_type])
                    roll_scaled = calculations.scale_roll(roll, scale_type[experiment_type])
                    truth_value = stimulus.get("truth")
                    slider_value = stimulus.get("value")
                    
                    pitch_difference = abs(truth_value - pitch_scaled)
                    roll_difference = abs(truth_value - roll_scaled)

                    stimulus["participant_id"] = participant_id
                    stimulus["pitch"] = pitch
                    stimulus["pitch_truth"] = pitch_scaled
                    stimulus["pitch_truth_diff"] = pitch_difference
                
                    stimulus["roll"] = roll
                    stimulus["roll_truth"] = roll_scaled
                    stimulus["roll_truth_diff"] = roll_difference
                    
                    stimulus["slider_diff"] = abs(truth_value - slider_value)
                    
                    if truth_value == 0:
                        stimulus["pitch_truth_diff_percentage"] = (pitch_scaled / scale_type[experiment_type]) * 100
                        stimulus["roll_truth_diff_percentage"] = (roll_scaled / scale_type[experiment_type]) * 100
                    else:
                        stimulus["pitch_truth_diff_percentage"] = (pitch_difference / truth_value) * 100
                        stimulus["roll_truth_diff_percentage"] = (roll_difference / truth_value) * 100
                        
                    stimulus["experiment_type"] = experiment_type
                    stimulus["experiment_time_spent"] = ended_date - started_date
                    stimulus["calibration"] = sensor_reading.get("calibration_status")
                            
                    diff = stimulus[experiment_truth_type[experiment_type]["truth_diff"]]
                    if ensure_not_outlier(diff, experiment_type):
                        stimuli.append(stimulus)
                    else:
                        outliers.append(stimulus)

df = pd.DataFrame(data=stimuli)
df['participant_id'] = df['participant_id'].astype(int)
o_df = pd.DataFrame(data=outliers)
p_df = pd.DataFrame(data=participants)
p_df['age'] = p_df['age'].astype(int)

In [53]:
print(f'Valid stimuli: {len(df)}')
print(f'Outliers: {len(o_df)}')
print(f'Percentage of outliers: {len(o_df) / len(df) * 100}')
#display(df)
#display(o_df)
#display(p_df)

Valid stimuli: 4155
Outliers: 645
Percentage of outliers: 15.523465703971121


In [54]:
unique_ages = sorted(p_df['age'].unique())

fig = px.histogram(
    p_df, 
    x='age', 
    color='genderIdentity', 
    title='Participant Age and Gender Distribution',
    labels={
        'age': 'Age',
        'genderIdentity': 'Gender Identity'
    },
    category_orders={'age': unique_ages},
    barmode='group'
)
fig.update_xaxes(type='category')
fig.update_layout(width=600, height=500)
fig.show()

print(f'Female participants: {len(p_df[(p_df["genderIdentity"] == "female")])}')
print(f'Male participants: {len(p_df[(p_df["genderIdentity"] == "male")])}')
print(f'Avg age: {p_df['age'].mean()}')
print(f'Avg age: {p_df['age'].median()}')
print(f'Avg age: {p_df['age'].std()}')

Female participants: 13
Male participants: 27
Avg age: 30.95
Avg age: 30.0
Avg age: 6.702276689343169


In [55]:
g_pitch_greyscale_df = df[(df["experiment_type"] == "gesture_pitch_greyscale")]
g_pitch_number_df = df[(df["experiment_type"] == "gesture_pitch_number")]
g_roll_greyscale_df = df[(df["experiment_type"] == "gesture_roll_greyscale")]
g_roll_number_df = df[(df["experiment_type"] == "gesture_roll_number")]
g_slider_greyscale_df = df[(df["experiment_type"] == "slider_greyscale")]
g_slider_number_df = df[(df["experiment_type"] == "slider_number")]

fig = sp.make_subplots(rows=6, cols=2, subplot_titles=(
    'Pitch - Number (Scaled 0-100)',
    'Pitch Error - Number (Scaled 0-100)',
    'Roll - Number (Scaled 0-100)',
    'Roll Error - Number (Scaled 0-100)',
    'Slider - Number (Scaled 0-100)',
    'Slider Error - Number (Scaled 0-100)',
    'Pitch - Greyscale (Scaled 0-49)',
    'Pitch Error - Greyscale (Scaled 0-49)',
    'Roll - Greyscale (Scaled 0-49)',
    'Roll Error - Greyscale (Scaled 0-49)',
    'Slider - Greyscale (Scaled 0-49)',
    'Slider Error - Greyscale (Scaled 0-49)'
))

def add_scatter_with_equal_axes(df, truth_info, error_y, scale, row, col, fig):
    legend_repeated = row < 2 and col < 2
    trace = px.scatter(df, x="truth", y=truth_info['truth'], error_y=error_y, color_discrete_sequence=[px.colors.qualitative.Plotly[row - 1]]).data[0]
    fig.add_trace(trace, row=row, col=col)
    fig.update_xaxes(tickmode='linear', dtick=10, row=row, col=col)
    fig.update_yaxes(tickmode='linear', dtick=10, row=row, col=col)
    fig.add_shape(
        type="line",
        x0=0, y0=0, x1=scale, y1=scale,
        line=dict(color="Black", width=1, dash="dot"),
        row=row, col=col,
        showlegend=legend_repeated,
        name="Linear Truth")

add_scatter_with_equal_axes(g_pitch_number_df, experiment_truth_type["gesture_pitch_number"], None, scale_type["gesture_pitch_number"], 1, 1, fig)
add_scatter_with_equal_axes(g_pitch_number_df, experiment_truth_type["gesture_pitch_number"], experiment_truth_type["gesture_pitch_number"]["truth_diff"], scale_type["gesture_pitch_number"], 1, 2, fig)

add_scatter_with_equal_axes(g_roll_number_df, experiment_truth_type["gesture_roll_number"], None, scale_type["gesture_roll_number"], 2, 1, fig)
add_scatter_with_equal_axes(g_roll_number_df, experiment_truth_type["gesture_roll_number"], experiment_truth_type["gesture_roll_number"]["truth_diff"], scale_type["gesture_roll_number"], 2, 2, fig)

add_scatter_with_equal_axes(g_slider_number_df, experiment_truth_type["slider_number"], None, scale_type["slider_number"], 3, 1, fig)
add_scatter_with_equal_axes(g_slider_number_df, experiment_truth_type["slider_number"], experiment_truth_type["slider_number"]["truth_diff"], scale_type["slider_number"], 3, 2, fig)

add_scatter_with_equal_axes(g_pitch_greyscale_df, experiment_truth_type["gesture_pitch_greyscale"], None, scale_type["gesture_pitch_greyscale"], 4, 1, fig)
add_scatter_with_equal_axes(g_pitch_greyscale_df, experiment_truth_type["gesture_pitch_greyscale"], experiment_truth_type["gesture_pitch_greyscale"]["truth_diff"], scale_type["gesture_pitch_greyscale"], 4, 2, fig)

add_scatter_with_equal_axes(g_roll_greyscale_df, experiment_truth_type["gesture_roll_greyscale"], None, scale_type["gesture_roll_greyscale"], 5, 1, fig)
add_scatter_with_equal_axes(g_roll_greyscale_df, experiment_truth_type["gesture_roll_greyscale"], experiment_truth_type["gesture_roll_greyscale"]["truth_diff"], scale_type["gesture_roll_greyscale"], 5, 2, fig)

add_scatter_with_equal_axes(g_slider_greyscale_df, experiment_truth_type["slider_greyscale"], None, scale_type["slider_greyscale"], 6, 1, fig)
add_scatter_with_equal_axes(g_slider_greyscale_df, experiment_truth_type["slider_greyscale"], experiment_truth_type["slider_greyscale"]["truth_diff"], scale_type["slider_greyscale"], 6, 2, fig)

fig.update_layout(height=6*400, width=2*400, title_text="Sensor Readings Scatter Plots")
fig.update_xaxes(title_text="Truth Values")
fig.update_yaxes(title_text="Sensor Readings")

fig['layout']['yaxis1'].update(title='Pitch Sensor Readings')
fig['layout']['yaxis2'].update(title='Pitch Sensor Readings')
fig['layout']['yaxis3'].update(title='Roll Sensor Readings')
fig['layout']['yaxis4'].update(title='Roll Sensor Readings')
fig['layout']['yaxis5'].update(title='Slider Readings')
fig['layout']['yaxis6'].update(title='Slider Readings')
fig['layout']['yaxis7'].update(title='Roll Sensor Readings')
fig['layout']['yaxis8'].update(title='Roll Sensor Readings')
fig['layout']['yaxis9'].update(title='Pitch Sensor Readings')
fig['layout']['yaxis10'].update(title='Pitch Sensor Readings')
fig['layout']['yaxis11'].update(title='Slider Readings')
fig['layout']['yaxis12'].update(title='Slider Readings')

fig.show()

In [30]:
color_index = [
    "gesture_pitch_number",
    "gesture_roll_number",
    "slider_number",
    "gesture_pitch_greyscale",
    "gesture_roll_greyscale",
    "slider_greyscale"
]
colors = px.colors.qualitative.Plotly

def create_deviation_plots(truth_values, experiment_type, truth_type):
    filtered_df = df[(df['experiment_type'] == experiment_type) & 
                     (df['truth'].isin(truth_values)) & 
                     (df['inputType'] == truth_type["input_type"])]

    mean_truth = filtered_df.groupby("truth")[truth_type["truth"]].mean().reset_index()
    std_truth = filtered_df.groupby("truth")[truth_type["truth"]].std().reset_index()

    mean_std_df = pd.merge(mean_truth, std_truth, on="truth", suffixes=('_mean', '_std'))
    
    # Have to fill NaNs with zeroes, needed if one or more truths only have one reading
    mean_std_df[f'{truth_type["truth"]}_std'] = mean_std_df[f'{truth_type["truth"]}_std'].fillna(0)
    mean_std_df[f'{truth_type["truth"]}_std'] = mean_std_df[f'{truth_type["truth"]}_std'].abs()
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=mean_std_df["truth"], y=mean_std_df[f'{truth_type["truth"]}_mean'],
        mode='lines', line=dict(color=colors[color_index.index(experiment_type)]), name=f'Mean {truth_type["label"]}'
    ))

    upper_bound = mean_std_df[f'{truth_type["truth"]}_mean'] + mean_std_df[f'{truth_type["truth"]}_std']
    lower_bound = mean_std_df[f'{truth_type["truth"]}_mean'] - mean_std_df[f'{truth_type["truth"]}_std']
    
    fig.add_trace(go.Scatter(
        x=mean_std_df["truth"], y=upper_bound,
        fill=None, mode='lines', line=dict(color='lightblue'), name='Upper Deviation Range', showlegend=False
    ))
    fig.add_trace(go.Scatter(
        x=mean_std_df["truth"], y=lower_bound,
        fill='tonexty', mode='lines', line=dict(color='lightblue'), name='Deviation Range'
    ))
    
    scale_min = min(truth_values)
    scale_max = max(truth_values)
    
    fig.add_shape(
        type="line",
        x0=scale_min, y0=scale_min, x1=scale_max, y1=scale_max,
        line=dict(color="Black", width=1, dash="dot"),
        showlegend=True,
        name="Linear Truth"
    )

    fig.update_layout(
        title=f"Deviation Plot for {truth_type['label']} - {experiment_type}",
        xaxis_title="Truth Values",
        yaxis_title=f"{truth_type['label']} Values",
        hovermode="x",
        width=600,
        height=500
    )
    
    if len(truth_values) > 10:
        fig.update_xaxes(tickmode='linear', dtick=10)
        fig.update_yaxes(tickmode='linear', dtick=10)
    else:
        fig.update_yaxes(range=[0, scale_type[experiment_type]])

    fig.show()
    
create_deviation_plots([i for i in range(0, 101)], "gesture_pitch_number", experiment_truth_type["gesture_pitch_number"])
    
for batch in truth_value_batches_100:
    create_deviation_plots(batch, "gesture_pitch_number", experiment_truth_type["gesture_pitch_number"])

create_deviation_plots([i for i in range(0, 101)], "gesture_roll_number", experiment_truth_type["gesture_roll_number"])

for batch in truth_value_batches_100:
    create_deviation_plots(batch, "gesture_roll_number", experiment_truth_type["gesture_roll_number"])
    
create_deviation_plots([i for i in range(0, 101)], "slider_number", experiment_truth_type["slider_number"])
    
for batch in truth_value_batches_100:
    create_deviation_plots(batch, "slider_number", experiment_truth_type["slider_number"])
    
create_deviation_plots([i for i in range(0, 50)], "gesture_pitch_greyscale", experiment_truth_type["gesture_pitch_greyscale"])

for batch in truth_value_batches_49:
    create_deviation_plots(batch, "gesture_pitch_greyscale", experiment_truth_type["gesture_pitch_greyscale"])

create_deviation_plots([i for i in range(0, 50)], "gesture_roll_greyscale", experiment_truth_type["gesture_roll_greyscale"])

for batch in truth_value_batches_49:
    create_deviation_plots(batch, "gesture_roll_greyscale", experiment_truth_type["gesture_roll_greyscale"])

create_deviation_plots([i for i in range(0, 50)], "slider_greyscale", experiment_truth_type["slider_greyscale"])

for batch in truth_value_batches_49:
    create_deviation_plots(batch, "slider_greyscale", experiment_truth_type["slider_greyscale"])

['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']


In [31]:
def create_histogram_truth_plots(experiment_type, truth_type):
    filtered_df = df[(df['experiment_type'] == experiment_type) & 
                     (df['inputType'] == truth_type["input_type"])]

    fig = px.histogram(filtered_df, x=truth_type["truth_diff"],
                    title=f'{truth_type['label']} Differences - {experiment_type}',
                    labels={f'{truth_type["truth_diff"]}': f'{truth_type["label"]} Difference'},
                    color_discrete_sequence=[colors[color_index.index(experiment_type)]]
                    )
    fig.update_layout(width=500, height=500)
    fig.show()
    
for experiment_type, truth_type in experiment_truth_type.items():
    create_histogram_truth_plots(experiment_type, truth_type)

In [71]:
def calculate_summary_statistics(experiment_type, truth_type):
    filtered_df = df[df['experiment_type'] == experiment_type]
    grouped = filtered_df.groupby('truth')[truth_type].agg(['mean', 'std', 'min', 'max', 'count', 'median'])
    grouped['iqr'] = filtered_df.groupby('truth')[truth_type].apply(iqr)
    grouped['skew'] = filtered_df.groupby('truth')[truth_type].apply(skew)
    grouped['kurtosis'] = filtered_df.groupby('truth')[truth_type].apply(kurtosis)
    
    # Kurtosis and skew calculate NaNs on certain truths (0 and 100), but it's not an issue. Filling NaNs with zeroes
    grouped = grouped.fillna(0)
    return grouped.reset_index()

def generate_summary_tables():
    summary_tables = {}
    for experiment_type, truth_info in experiment_truth_type.items():
        summary_table = calculate_summary_statistics(experiment_type, truth_info['truth'])
        summary_table['experiment_type'] = experiment_type
        summary_tables[experiment_type] = summary_table
    return summary_tables

def calculate_truth_diff_percentages(experiment_type, truth_diff_column):
    filtered_df = df[df['experiment_type'] == experiment_type]
    scale = scale_type[experiment_type]
    scale_5 = scale * 0.05
    scale_10 = scale * 0.10
    scale_15 = scale * 0.15
    scale_20 = scale * 0.20
    scale_25 = scale * 0.25
    count = filtered_df.shape[0]
    within_5 = (filtered_df[truth_diff_column].abs() <= scale_5).sum() / count * 100
    within_10 = (filtered_df[truth_diff_column].abs() <= scale_10).sum() / count * 100
    within_15 = (filtered_df[truth_diff_column].abs() <= scale_15).sum() / count * 100
    within_20 = (filtered_df[truth_diff_column].abs() <= scale_20).sum() / count * 100
    within_25 = (filtered_df[truth_diff_column].abs() <= scale_25).sum() / count * 100
    return within_5, within_10, within_15, within_20, within_25

def calculate_averages_per_experiment_type(summary_tables):
    averages = []
    for experiment_type, summary_table in summary_tables.items():
        within_5, within_10, within_15, within_20, within_25 = calculate_truth_diff_percentages(experiment_type, experiment_truth_type[experiment_type]['truth_diff'])
        
        time_spent_df = df[df['experiment_type'] == experiment_type].drop_duplicates(subset=['participant_id', 'experiment_type'])
        time_spent_avg = time_spent_df['experiment_time_spent'].abs().mean()
        time_spent_min = time_spent_df['experiment_time_spent'].abs().min()
        time_spent_max = time_spent_df['experiment_time_spent'].abs().max()
        
        averages.append({
            'experiment_type': experiment_type,
            'time_spent_avg': time_spent_avg,
            'time_spent_min': time_spent_min,
            'time_spent_max': time_spent_max,
            'mean': summary_table['mean'].mean(),
            'std': summary_table['std'].mean(),
            'min': summary_table['min'].min(),
            'max': summary_table['max'].max(),
            'count': summary_table['count'].count(),
            'median': summary_table['median'].median(),
            'iqr': summary_table['iqr'].mean(),
            'skew': summary_table['skew'].mean(),
            'kurtosis': summary_table['kurtosis'].mean(),
            'within_5_percent': within_5,
            'within_10_percent': within_10,
            'within_15_percent': within_15,
            'within_20_percent': within_20,
            'within_25_percent': within_25
        })
    return pd.DataFrame(averages)

summary_tables = generate_summary_tables()
averages_df = calculate_averages_per_experiment_type(summary_tables)

display(averages_df)

for _, summary_table in summary_tables.items():
    display(summary_table)
    
total_time_spent_per_participant = df.drop_duplicates(subset=['participant_id', 'experiment_type']).groupby('participant_id')['experiment_time_spent'].sum().reset_index()
total_time_spent_per_participant.columns = ['participant_id', 'total_time_spent']
total_time_spent_per_participant = total_time_spent_per_participant.sort_values(by=['participant_id'])

total_time_min = total_time_spent_per_participant['total_time_spent'].min()
total_time_mean = total_time_spent_per_participant['total_time_spent'].mean()
total_time_max = total_time_spent_per_participant['total_time_spent'].max()

display(total_time_spent_per_participant)

total_time_statistics = pd.DataFrame({
    'Statistic': ['Min', 'Mean', 'Max'],
    'Total Time Spent (Seconds)': [total_time_min, total_time_mean, total_time_max],
    'Total Time Spent (Minutes)': [total_time_min / 60, total_time_mean / 60, total_time_max / 60],
})

display(total_time_statistics)

Unnamed: 0,experiment_type,time_spent_avg,time_spent_min,time_spent_max,mean,std,min,max,count,median,iqr,skew,kurtosis,within_5_percent,within_10_percent,within_15_percent,within_20_percent,within_25_percent
0,gesture_pitch_number,142.462478,73.162797,399.222038,43.955382,9.8535,0.0,100.0,101,45.0,10.133663,0.225807,-0.564453,36.641221,62.468193,78.498728,88.167939,94.147583
1,gesture_roll_number,147.745097,80.542917,437.24336,52.601621,12.083263,0.0,98.0,101,54.0,13.284653,-0.199837,-0.600441,27.889447,48.241206,66.457286,78.140704,86.683417
2,slider_number,156.41774,65.096681,313.937267,48.825613,3.64819,0.0,100.0,101,50.086716,3.995099,-0.102858,-0.485317,74.185464,94.110276,98.370927,99.373434,99.749373
3,gesture_pitch_greyscale,154.363283,100.113338,303.560128,22.163599,11.516437,0.0,49.0,50,20.0,17.96,0.150403,-1.092592,13.139932,21.16041,33.959044,41.12628,53.071672
4,gesture_roll_greyscale,178.676615,103.378948,501.023651,25.062057,9.874656,0.0,48.0,50,25.75,14.36,-0.084693,-0.857869,14.009662,21.256039,34.460548,45.088567,58.454106
5,slider_greyscale,156.690465,93.029034,246.268256,23.826014,11.83534,0.0,49.0,50,24.789451,18.394495,0.027139,-1.02282,13.908451,25.176056,35.56338,47.007042,55.457746


Unnamed: 0,truth,mean,std,min,max,count,median,iqr,skew,kurtosis,experiment_type
0,0,4.200000,4.237400,0,15,10,3.0,1.75,1.710159,2.455360,gesture_pitch_number
1,1,3.200000,3.962323,0,10,5,2.0,2.00,1.209900,-0.124001,gesture_pitch_number
2,2,5.200000,1.923538,2,7,5,6.0,1.00,-1.017952,-0.348064,gesture_pitch_number
3,3,3.000000,2.449490,1,6,4,2.5,3.50,0.314270,-1.592593,gesture_pitch_number
4,4,7.285714,5.075258,0,15,14,5.5,8.50,0.273959,-1.397457,gesture_pitch_number
...,...,...,...,...,...,...,...,...,...,...,...
96,96,88.333333,7.549834,78,100,9,87.0,8.00,0.538742,-0.825081,gesture_pitch_number
97,97,87.000000,10.198039,73,100,5,88.0,10.00,-0.143892,-1.028731,gesture_pitch_number
98,98,86.600000,9.240010,72,100,10,86.5,12.25,-0.206554,-1.017168,gesture_pitch_number
99,99,86.777778,8.422853,74,96,9,91.0,13.00,-0.303865,-1.576782,gesture_pitch_number


Unnamed: 0,truth,mean,std,min,max,count,median,iqr,skew,kurtosis,experiment_type
0,0,13.000000,15.684387,1,36,4,7.5,11.00,1.000767,-0.772975,gesture_roll_number
1,1,13.333333,9.136009,1,25,6,13.0,12.00,-0.009066,-1.270703,gesture_roll_number
2,2,17.400000,9.008638,3,28,10,21.0,12.75,-0.545496,-1.121655,gesture_roll_number
3,3,22.600000,14.120120,2,44,10,21.0,20.25,0.150598,-1.131065,gesture_roll_number
4,4,27.285714,9.724784,12,39,7,24.0,13.00,-0.197019,-1.139066,gesture_roll_number
...,...,...,...,...,...,...,...,...,...,...,...
96,96,84.866667,14.744813,47,98,15,90.0,14.00,-1.422648,1.005490,gesture_roll_number
97,97,84.000000,15.964246,54,97,8,92.0,12.75,-1.125867,-0.425798,gesture_roll_number
98,98,81.500000,14.611639,60,98,6,82.5,19.75,-0.287593,-1.291791,gesture_roll_number
99,99,87.333333,12.878923,62,98,6,91.0,5.00,-1.500216,0.727784,gesture_roll_number


Unnamed: 0,truth,mean,std,min,max,count,median,iqr,skew,kurtosis,experiment_type
0,0,0.000000,0.000000,0.000000,0.000000,9,0.000000,0.000000,0.000000,0.000000,slider_number
1,1,0.279079,0.291243,0.000000,0.952432,10,0.247583,0.248597,1.256456,0.881797,slider_number
2,2,0.511521,0.474622,0.062684,1.238188,5,0.538476,0.498784,0.599621,-0.871930,slider_number
3,3,1.727256,1.256311,0.121591,3.960529,9,1.375663,1.324886,0.633870,-0.704310,slider_number
4,4,1.916536,1.174061,0.939227,4.802597,9,1.428166,0.822436,1.826212,2.248111,slider_number
...,...,...,...,...,...,...,...,...,...,...,...
96,96,97.479623,2.045820,94.162639,99.486481,5,97.768463,1.487763,-0.862639,-0.506769,slider_number
97,97,98.535673,0.705188,97.511674,99.248752,8,98.877689,1.096923,-0.537688,-1.415678,slider_number
98,98,99.326813,0.231465,99.078677,99.536891,3,99.364870,0.229107,-0.293893,-1.500000,slider_number
99,99,99.677867,0.253713,99.313720,99.953773,5,99.665208,0.294972,-0.349900,-1.114389,slider_number


Unnamed: 0,truth,mean,std,min,max,count,median,iqr,skew,kurtosis,experiment_type
0,0,13.125,9.037663,0,24,24,15.5,18.5,-0.236782,-1.55586,gesture_pitch_greyscale
1,1,12.75,7.664855,1,26,12,14.0,8.25,0.061122,-0.767839,gesture_pitch_greyscale
2,2,10.888889,10.325266,0,25,9,11.0,19.0,0.200334,-1.610406,gesture_pitch_greyscale
3,3,8.857143,8.970852,1,26,7,5.0,10.0,1.026841,-0.162633,gesture_pitch_greyscale
4,4,13.9,10.764654,0,29,10,17.0,17.5,-0.05015,-1.406065,gesture_pitch_greyscale
5,5,13.818182,9.857162,0,30,11,12.0,12.0,0.416569,-0.894117,gesture_pitch_greyscale
6,6,9.8,5.069517,3,15,5,12.0,7.0,-0.397488,-1.483088,gesture_pitch_greyscale
7,7,16.222222,10.121484,0,28,9,21.0,17.0,-0.572898,-1.276946,gesture_pitch_greyscale
8,8,10.846154,10.22942,1,29,13,6.0,17.0,0.576254,-1.174862,gesture_pitch_greyscale
9,9,17.416667,10.334922,6,34,12,12.5,17.25,0.484571,-1.34278,gesture_pitch_greyscale


Unnamed: 0,truth,mean,std,min,max,count,median,iqr,skew,kurtosis,experiment_type
0,0,11.65,7.4075,1,25,20,13.0,13.0,0.084831,-1.291665,gesture_roll_greyscale
1,1,17.222222,7.870479,6,25,9,19.0,13.0,-0.508667,-1.387468,gesture_roll_greyscale
2,2,14.75,7.086204,4,25,8,15.5,9.0,-0.106548,-1.178202,gesture_roll_greyscale
3,3,20.6,6.310485,9,28,10,23.5,8.75,-0.6708,-0.901385,gesture_roll_greyscale
4,4,21.2,11.497826,1,29,5,26.0,4.0,-1.37111,0.073195,gesture_roll_greyscale
5,5,17.25,7.237842,2,27,12,17.5,9.5,-0.463627,-0.237055,gesture_roll_greyscale
6,6,17.315789,9.632426,2,30,19,14.0,15.5,-0.021382,-1.411723,gesture_roll_greyscale
7,7,19.3,5.498485,10,29,10,18.5,6.75,0.188705,-0.519048,gesture_roll_greyscale
8,8,17.5,10.672394,4,33,6,17.5,13.0,0.149233,-1.108514,gesture_roll_greyscale
9,9,22.235294,7.89406,4,34,17,24.0,11.0,-0.627571,0.006547,gesture_roll_greyscale


Unnamed: 0,truth,mean,std,min,max,count,median,iqr,skew,kurtosis,experiment_type
0,0,5.962904,8.863585,0.0,23.779242,9,0.95398,9.932147,1.128204,-0.265037,slider_greyscale
1,1,10.78731,10.321972,0.0,25.85638,8,6.894465,16.753436,0.428695,-1.457268,slider_greyscale
2,2,12.006344,8.761011,0.0,24.858178,11,15.035556,13.181805,-0.223034,-1.285385,slider_greyscale
3,3,5.68655,4.357942,1.275259,13.260913,6,5.88265,3.694565,0.728339,-0.351563,slider_greyscale
4,4,7.967513,9.588557,0.0,27.240539,17,2.470672,16.420128,0.779486,-0.974873,slider_greyscale
5,5,6.820443,9.101008,0.0,28.889381,13,2.198336,9.690809,1.297674,0.622771,slider_greyscale
6,6,20.531072,8.514144,5.369027,30.598727,9,22.420165,6.029935,-0.572572,-0.717548,slider_greyscale
7,7,8.612276,8.681202,0.0,22.779536,13,6.40862,15.902264,0.566701,-1.287191,slider_greyscale
8,8,8.150485,9.380019,0.0,25.443989,15,7.119905,12.112739,0.709362,-0.866924,slider_greyscale
9,9,10.263405,11.566068,0.0,30.606294,12,3.935996,17.849345,0.741621,-1.074912,slider_greyscale


Unnamed: 0,participant_id,total_time_spent
0,1,887.257005
1,2,882.93046
2,3,773.93381
3,4,1058.721711
4,5,948.011288
5,6,618.425564
6,7,693.265254
7,8,770.342037
8,9,998.765029
9,10,602.65093


Unnamed: 0,Statistic,Total Time Spent (Seconds),Total Time Spent (Minutes)
0,Min,576.042849,9.600714
1,Mean,936.355678,15.605928
2,Max,1947.956974,32.46595


In [69]:
for experiment_type, truth_info in experiment_truth_type.items():
    filtered_df = df[df['experiment_type'] == experiment_type]
    response_column = truth_info['truth_diff']
    for comparison_experiment_type, comparison_truth_info in experiment_truth_type.items():
        comparison_filtered_df = df[df['experiment_type'] == comparison_experiment_type]
        comparison_response_column = comparison_truth_info['truth_diff']

        t_stat, p_value = ttest_ind(filtered_df[response_column], comparison_filtered_df[comparison_response_column], equal_var=False)
        print(f"t-statistic for {experiment_type} vs {comparison_experiment_type}: {t_stat}, p-value: {p_value}\n")

t-statistic for gesture_pitch_number vs gesture_pitch_number: 0.0, p-value: 1.0

t-statistic for gesture_pitch_number vs gesture_roll_number: -6.667846008118237, p-value: 3.607530364007293e-11

t-statistic for gesture_pitch_number vs slider_number: 18.82498339113322, p-value: 1.6140050123472025e-68

t-statistic for gesture_pitch_number vs gesture_pitch_greyscale: -4.286567426781089, p-value: 1.9448590708442383e-05

t-statistic for gesture_pitch_number vs gesture_roll_greyscale: -2.849464353979786, p-value: 0.004443328796570874

t-statistic for gesture_pitch_number vs slider_greyscale: -2.7756445953636244, p-value: 0.0055870051921530555

t-statistic for gesture_roll_number vs gesture_pitch_number: 6.667846008118237, p-value: 3.607530364007293e-11

t-statistic for gesture_roll_number vs gesture_roll_number: 0.0, p-value: 1.0

t-statistic for gesture_roll_number vs slider_number: 24.436500323212133, p-value: 7.173099210186617e-104

t-statistic for gesture_roll_number vs gesture_pitch_grey

In [35]:
all_valid_stimuli = []
all_outliers = []

for experiment_type, truth_info in experiment_truth_type.items():
    valid_stimuli = df[df['experiment_type'] == experiment_type].copy()
    valid_stimuli.loc[:, 'status'] = 'Valid'
    
    outliers = o_df[o_df['experiment_type'] == experiment_type].copy()
    outliers.loc[:, 'status'] = 'Outlier'
    
    # Transform into a standardized column, because each experiment type has its' own calculated truth value (e.g. pitch_truth_diff, roll_truth_diff, slider_diff)
    valid_stimuli.loc[:, 'standard_truth_diff'] = valid_stimuli[truth_info['truth_diff']]
    valid_stimuli.loc[:, 'standard_truth'] = valid_stimuli[truth_info['truth']]
    
    outliers.loc[:, 'standard_truth_diff'] = outliers[truth_info['truth_diff']]
    outliers.loc[:, 'standard_truth'] = outliers[truth_info['truth']]
    
    all_valid_stimuli.append(valid_stimuli.reset_index(drop=True))
    all_outliers.append(outliers.reset_index(drop=True))

combined_valid_stimuli = pd.concat(all_valid_stimuli, ignore_index=True)
combined_outliers = pd.concat(all_outliers, ignore_index=True)
combined_df = pd.concat([combined_valid_stimuli, combined_outliers], ignore_index=True)

fig_histogram = px.histogram(combined_outliers, x="standard_truth_diff", color='experiment_type', title="Histogram of Outlier Values")
fig_histogram.update_layout(xaxis_title="Truth Difference", width=700, height=500)
fig_histogram.update_xaxes(tickmode='linear', dtick=10)
fig_histogram.update_yaxes(tickmode='linear', dtick=10)
fig_histogram.show()

fig_box = px.box(combined_df, x='status', y='standard_truth_diff', color='experiment_type', points="all", title="Box Plot Comparing Valid Stimuli and Outliers")
fig_box.update_layout(xaxis_title="Status", yaxis_title="Truth Difference", width=1100, height=500)
fig_box.update_yaxes(tickmode='linear', dtick=10)
fig_box.show()

fig_scatter = px.scatter(combined_outliers, x="standard_truth", y="standard_truth_diff", color='experiment_type', title="Scatter Plot of Outliers")
fig_scatter.update_layout(xaxis_title="Truth", yaxis_title="Truth Difference", width=700, height=500)
fig_scatter.update_xaxes(tickmode='linear', dtick=10)
fig_scatter.update_yaxes(tickmode='linear', dtick=10)
fig_scatter.show()

counts_df = combined_df.groupby(['experiment_type', 'status']).size().reset_index(name='count')
fig_bar = px.bar(counts_df, x='experiment_type', y='count', color='status', barmode='group', title="Count of Valid Stimuli and Outliers per Experiment Type")
fig_bar.update_layout(xaxis_title="Experiment Type", width=700, height=500)
fig_bar.show()

In [4]:
combined_data = []

for experiment_type, truth_info in experiment_truth_type.items():
    standardized = df[df['experiment_type'] == experiment_type].copy()
    
    # Transform into a standardized column, because each experiment type has its' own calculated truth value (e.g. pitch_truth_diff, roll_truth_diff, slider_diff)
    standardized.loc[:, 'standard_truth_diff'] = standardized[truth_info['truth_diff']]
    standardized.loc[:, 'standard_truth'] = standardized[truth_info['truth']]

    standardized['is_last_five'] = standardized.groupby('participant_id').cumcount(ascending=False) < 5
    
    combined_data.append(standardized)

combined_df = pd.concat(combined_data, ignore_index=True)

last_five_accuracy = combined_df[combined_df['is_last_five']].groupby('experiment_type')['standard_truth_diff'].mean().reset_index()
last_five_accuracy.columns = ['experiment_type', 'last_five_mean_accuracy']

previous_accuracy = combined_df[~combined_df['is_last_five']].groupby('experiment_type')['standard_truth_diff'].mean().reset_index()
previous_accuracy.columns = ['experiment_type', 'previous_mean_accuracy']

accuracy_comparison = pd.merge(last_five_accuracy, previous_accuracy, on='experiment_type')

accuracy_comparison['comparison'] = accuracy_comparison.apply(
    lambda row: 'More Accurate' if row['last_five_mean_accuracy'] < row['previous_mean_accuracy'] else 'Less Accurate', axis=1
)

print(accuracy_comparison)

fig = px.bar(accuracy_comparison, x='experiment_type', y=['last_five_mean_accuracy', 'previous_mean_accuracy'], 
             barmode='group', title="Comparison of Last 5 Stimuli Truth Diff vs First 15 Stimuli", labels={'value': 'Average Truth Difference', 'variable': 'Stimuli Type'})
fig.update_layout(xaxis_title="Experiment Type", width=700, height=500, legend_title_text='Stimuli Type')
fig.for_each_trace(lambda t: t.update(name={
    'last_five_mean_accuracy': 'Last 5 Stimuli',
    'previous_mean_accuracy': 'First 15 Stimuli'
}[t.name]))
fig.show()

           experiment_type  last_five_mean_accuracy  previous_mean_accuracy  \
0  gesture_pitch_greyscale                12.730000               11.476684   
1     gesture_pitch_number                10.050000               10.040956   
2   gesture_roll_greyscale                11.670000               11.023753   
3      gesture_roll_number                13.690000               13.031879   
4         slider_greyscale                10.593752               11.583569   
5            slider_number                 3.575814                3.856925   

      comparison  
0  Less Accurate  
1  Less Accurate  
2  Less Accurate  
3  Less Accurate  
4  More Accurate  
5  More Accurate  


In [66]:
accuracy_threshold = {
    "gesture_pitch_number": 0.10,
    "gesture_roll_number": 0.10,
    "slider_number": 0.10,
    "gesture_pitch_greyscale": 0.15,
    "gesture_roll_greyscale": 0.15,
    "slider_greyscale": 0.15,
}

def get_valid_range(truth, original_scale, segments):
    segment_size = original_scale / segments
    segment_start = (truth // segment_size) * segment_size
    segment_end = segment_start + segment_size
    return segment_start, segment_end

def calculate_accuracy(df, experiment_type, truth_col, response_col, diff_col, original_scale, segments_list, accuracy_threshold):
    results = []
    original = []
    
    threshold = original_scale * accuracy_threshold
    df['within_threshold_current'] = (df[diff_col].abs() <= threshold).astype(int)
    
    current_accuracy = df['within_threshold_current'].mean() * 100
    original.append({
        'segments': scale_type[experiment_type],
        'accuracy': current_accuracy
    })
    
    total_stimuli = len(df)
    
    for segments in segments_list:
        valid_range = df.apply(lambda row: get_valid_range(row[truth_col], original_scale, segments), axis=1)
        df[f'segment_start_{segments}'] = valid_range.apply(lambda x: x[0])
        df[f'segment_end_{segments}'] = valid_range.apply(lambda x: x[1])
        
        df[f'within_segment_threshold_{segments}'] = (
            ((df[response_col] >= df[f'segment_start_{segments}']) & 
             (df[response_col] <= df[f'segment_end_{segments}'])) |
            (df[diff_col].abs() <= threshold)
        ).astype(int)
        
        accuracy_segment_threshold = df[f'within_segment_threshold_{segments}'].mean() * 100
        
        excluded_stimuli = total_stimuli - df[f'within_segment_threshold_{segments}'].sum()
        excluded_percentage = (excluded_stimuli / total_stimuli) * 100
        
        results.append({
            'segments': segments,
            'accuracy_segment_threshold': accuracy_segment_threshold,
            'excluded_percentage': excluded_percentage
        })
    
    return pd.DataFrame(results), pd.DataFrame(original)

combined_results = []
combined_original_results = []

for experiment_type, truth_info in experiment_truth_type.items():
    #if "greyscale" in experiment_type:
    #    continue
    
    df_experiment = df[df['experiment_type'] == experiment_type].copy()
    
    original_scale = scale_type[experiment_type]
    segments_list = [7, 5, 4, 3, 2]
    threshold = accuracy_threshold[experiment_type]
    
    accuracy_df, original_accuracy_df = calculate_accuracy(df_experiment, experiment_type, 'truth', truth_info['truth'], truth_info['truth_diff'], original_scale, segments_list, threshold)
    accuracy_df['experiment_type'] = experiment_type
    original_accuracy_df['experiment_type'] = experiment_type
    
    combined_results.append(accuracy_df)
    combined_original_results.append(original_accuracy_df)

combined_accuracy_df = pd.concat(combined_results, ignore_index=True)
combined_accuracy_original_df = pd.concat(combined_original_results, ignore_index=True)

fig_original_comparison = px.bar(
    combined_accuracy_original_df, 
    x='experiment_type', 
    y='accuracy', 
    color='experiment_type', 
    title="Accuracy on Original Scale",
    labels={
        'accuracy': 'Accuracy (%)',
        'experiment_type': 'Experiment Type'
    }
)
fig_original_comparison.update_layout(width=700, height=500, showlegend=False)
fig_original_comparison.show()

fig_segment_threshold_comparison = px.bar(
    combined_accuracy_df, 
    x='segments', 
    y='accuracy_segment_threshold', 
    color='experiment_type', 
    barmode='group', 
    title="Accuracy Comparison for Different Scale Segments",
    labels={
        'accuracy_segment_threshold': 'Accuracy (%)',
        'segments': 'Number of Segments'
    }
)
fig_segment_threshold_comparison.update_layout(width=700, height=500)
fig_segment_threshold_comparison.show()

fig_excluded_percentage_comparison = px.bar(
    combined_accuracy_df, 
    x='segments', 
    y='excluded_percentage', 
    color='experiment_type', 
    barmode='group', 
    title="Percentage of Excluded Stimuli for Different Scale Segments",
    labels={
        'excluded_percentage': 'Excluded Stimuli (%)',
        'segments': 'Number of Segments'
    }
)
fig_excluded_percentage_comparison.update_layout(width=700, height=500)
fig_excluded_percentage_comparison.show()

In [67]:
def get_segment_color(experiment_type, row, criteria):
    if row[criteria] == 1:
        return colors[color_index.index(experiment_type)]
    else:
        return '#7F7F7F' # Grey out excluded stimuli

def add_scatter_plot(df, experiment_type, truth_info, truth_col, response_col, diff_col, scale, segment, accuracy_threshold):
    threshold = scale * accuracy_threshold
    df[f'segment_start_{segment}'], df[f'segment_end_{segment}'] = zip(*df.apply(lambda row: get_valid_range(row[truth_col], scale, segment), axis=1))

    df[f'within_segment_threshold_{segment}'] = (
        ((df[response_col] >= df[f'segment_start_{segment}']) & 
         (df[response_col] <= df[f'segment_end_{segment}'])) |
        (df[diff_col].abs() <= threshold)
    ).astype(int)

    color_col = f'color_within_segment_threshold_{segment}'
    df[color_col] = df.apply(lambda row: get_segment_color(experiment_type, row, f'within_segment_threshold_{segment}'), axis=1)

    fig = go.Figure()

    scatter = go.Scatter(
        x=df[truth_col],
        y=df[response_col],
        mode='markers',
        marker=dict(color=df[color_col]),
        name=f'Within Range of Segment {segment} + Threshold ({accuracy_threshold})',
        legendgroup=f'within_segment_threshold_{segment}',
        showlegend=True
    )

    fig.add_trace(scatter)

    fig.update_xaxes(tickmode='linear', dtick=10)
    fig.update_yaxes(tickmode='linear', dtick=10)
    fig.add_shape(
        type="line",
        x0=0, y0=0, x1=scale, y1=scale,
        line=dict(color="Black", width=1, dash="dot"),
        showlegend=True,
        name="Linear Truth"
    )

    fig.update_layout(
        title=f"Scatter Plot for {truth_info['label']} (Segment Size: {segment}) - {experiment_type}",
        xaxis_title='Truth',
        yaxis_title=truth_info['label'],
        height=600,
        width=800
    )

    fig.show()

segments_list = [7, 5, 4, 3, 2]

for experiment_type, truth_info in experiment_truth_type.items():
    if "greyscale" in experiment_type:
        continue
    
    for segment in segments_list:
        df_experiment = df[df['experiment_type'] == experiment_type].copy()
        original_scale = scale_type[experiment_type]
        threshold = accuracy_threshold[experiment_type]
        add_scatter_plot(df_experiment, experiment_type, truth_info, 'truth', truth_info['truth'], truth_info['truth_diff'], original_scale, segment, threshold)

In [68]:
def calculate_accuracy_per_participant(experiment_type, truth_diff_column, threshold_percentage):
    filtered_df = df[df['experiment_type'] == experiment_type]
    scale = scale_type[experiment_type]
    threshold = scale * threshold_percentage
    
    accuracy_df = filtered_df.groupby('participant_id').apply(
        lambda x: (x[truth_diff_column].abs() <= threshold).sum() / len(x) * 100
    , include_groups=False).reset_index()
    
    accuracy_df.columns = ['participant_id', 'accuracy']
    return accuracy_df

all_accuracy_dfs = []

for experiment_type, truth_info in experiment_truth_type.items():
    accuracy_df = calculate_accuracy_per_participant(experiment_type, truth_info['truth_diff'], accuracy_threshold[experiment_type])
    accuracy_df['experiment_type'] = experiment_type
    all_accuracy_dfs.append(accuracy_df)
    display(accuracy_df)

combined_accuracy_df = pd.concat(all_accuracy_dfs)

overall_accuracy_df = combined_accuracy_df.groupby('participant_id')['accuracy'].mean().reset_index()
overall_accuracy_df.columns = ['participant_id', 'overall_accuracy']

number_accuracy_df = combined_accuracy_df[combined_accuracy_df['experiment_type'].str.contains('_number')]
number_accuracy_df = number_accuracy_df.groupby('participant_id')['accuracy'].mean().reset_index()
number_accuracy_df.columns = ['participant_id', 'number_accuracy']

greyscale_accuracy_df = combined_accuracy_df[combined_accuracy_df['experiment_type'].str.contains('_greyscale')]
greyscale_accuracy_df = greyscale_accuracy_df.groupby('participant_id')['accuracy'].mean().reset_index()
greyscale_accuracy_df.columns = ['participant_id', 'greyscale_accuracy']

final_accuracy_df = overall_accuracy_df.merge(number_accuracy_df, on='participant_id', how='left')
final_accuracy_df = final_accuracy_df.merge(greyscale_accuracy_df, on='participant_id', how='left')

final_accuracy_df = final_accuracy_df.sort_values(by='overall_accuracy', ascending=False)
display(final_accuracy_df)

final_accuracy_df = final_accuracy_df.sort_values(by='number_accuracy', ascending=False)
display(final_accuracy_df)

final_accuracy_df = final_accuracy_df.sort_values(by='greyscale_accuracy', ascending=False)
display(final_accuracy_df)

print(f"Overall Mean: {final_accuracy_df['overall_accuracy'].mean()}")
print(f"Overall SD: {final_accuracy_df['overall_accuracy'].std()}")
print(f"Overall Min: {final_accuracy_df['overall_accuracy'].min()}")
print(f"Overall Max: {final_accuracy_df['overall_accuracy'].max()}")

print(f"Number Mean: {final_accuracy_df['number_accuracy'].mean()}")
print(f"Number SD: {final_accuracy_df['number_accuracy'].std()}")
print(f"Number Min: {final_accuracy_df['number_accuracy'].min()}")
print(f"Number Max: {final_accuracy_df['number_accuracy'].max()}")

print(f"Greyscale Mean: {final_accuracy_df['greyscale_accuracy'].mean()}")
print(f"Greyscale SD: {final_accuracy_df['greyscale_accuracy'].std()}")
print(f"Greyscale Min: {final_accuracy_df['greyscale_accuracy'].min()}")
print(f"Greyscale Max: {final_accuracy_df['greyscale_accuracy'].max()}")

Unnamed: 0,participant_id,accuracy,experiment_type
0,1,10.0,gesture_pitch_number
1,2,28.571429,gesture_pitch_number
2,3,75.0,gesture_pitch_number
3,4,70.0,gesture_pitch_number
4,5,40.0,gesture_pitch_number
5,6,65.0,gesture_pitch_number
6,7,65.0,gesture_pitch_number
7,8,50.0,gesture_pitch_number
8,9,16.666667,gesture_pitch_number
9,10,40.0,gesture_pitch_number


Unnamed: 0,participant_id,accuracy,experiment_type
0,1,65.0,gesture_roll_number
1,2,40.0,gesture_roll_number
2,3,40.0,gesture_roll_number
3,4,50.0,gesture_roll_number
4,5,22.222222,gesture_roll_number
5,6,55.0,gesture_roll_number
6,7,5.0,gesture_roll_number
7,8,45.0,gesture_roll_number
8,9,85.0,gesture_roll_number
9,10,75.0,gesture_roll_number


Unnamed: 0,participant_id,accuracy,experiment_type
0,1,80.0,slider_number
1,2,100.0,slider_number
2,3,100.0,slider_number
3,4,100.0,slider_number
4,5,80.0,slider_number
5,6,90.0,slider_number
6,7,85.0,slider_number
7,8,95.0,slider_number
8,9,100.0,slider_number
9,10,95.0,slider_number


Unnamed: 0,participant_id,accuracy,experiment_type
0,1,27.777778,gesture_pitch_greyscale
1,2,25.0,gesture_pitch_greyscale
2,3,18.75,gesture_pitch_greyscale
3,4,46.666667,gesture_pitch_greyscale
4,5,40.0,gesture_pitch_greyscale
5,6,50.0,gesture_pitch_greyscale
6,7,28.571429,gesture_pitch_greyscale
7,8,66.666667,gesture_pitch_greyscale
8,9,11.111111,gesture_pitch_greyscale
9,10,25.0,gesture_pitch_greyscale


Unnamed: 0,participant_id,accuracy,experiment_type
0,1,31.25,gesture_roll_greyscale
1,2,33.333333,gesture_roll_greyscale
2,3,31.25,gesture_roll_greyscale
3,4,13.333333,gesture_roll_greyscale
4,5,33.333333,gesture_roll_greyscale
5,6,50.0,gesture_roll_greyscale
6,7,30.0,gesture_roll_greyscale
7,8,20.0,gesture_roll_greyscale
8,9,40.0,gesture_roll_greyscale
9,10,52.941176,gesture_roll_greyscale


Unnamed: 0,participant_id,accuracy,experiment_type
0,1,50.0,slider_greyscale
1,2,23.529412,slider_greyscale
2,3,50.0,slider_greyscale
3,4,60.0,slider_greyscale
4,5,46.153846,slider_greyscale
5,6,57.142857,slider_greyscale
6,7,27.272727,slider_greyscale
7,8,18.75,slider_greyscale
8,9,38.888889,slider_greyscale
9,10,54.545455,slider_greyscale


Unnamed: 0,participant_id,overall_accuracy,number_accuracy,greyscale_accuracy
21,22,62.209653,80.0,44.419306
24,25,61.748366,81.666667,41.830065
10,11,61.672772,80.0,43.345543
5,6,61.190476,70.0,52.380952
36,37,59.754902,85.0,34.509804
12,13,58.142283,76.666667,39.617898
29,30,57.81746,81.666667,33.968254
9,10,57.081105,70.0,44.16221
3,4,56.666667,73.333333,40.0
19,20,55.991285,75.0,36.982571


Unnamed: 0,participant_id,overall_accuracy,number_accuracy,greyscale_accuracy
36,37,59.754902,85.0,34.509804
24,25,61.748366,81.666667,41.830065
29,30,57.81746,81.666667,33.968254
21,22,62.209653,80.0,44.419306
10,11,61.672772,80.0,43.345543
12,13,58.142283,76.666667,39.617898
16,17,46.346154,76.666667,16.025641
27,28,54.603175,76.666667,32.539683
23,24,51.915954,75.0,28.831909
19,20,55.991285,75.0,36.982571


Unnamed: 0,participant_id,overall_accuracy,number_accuracy,greyscale_accuracy
5,6,61.190476,70.0,52.380952
17,18,54.583333,63.333333,45.833333
39,40,52.287582,60.0,44.575163
21,22,62.209653,80.0,44.419306
9,10,57.081105,70.0,44.16221
10,11,61.672772,80.0,43.345543
24,25,61.748366,81.666667,41.830065
34,35,53.279915,65.0,41.559829
28,29,54.370791,68.703704,40.037879
3,4,56.666667,73.333333,40.0


Overall Mean: 51.33593992973831
Overall SD: 6.6007190951451795
Overall Min: 33.773148148148145
Overall Max: 62.20965309200603
Number Mean: 67.98809523809526
Number SD: 10.375778241472288
Number Min: 31.666666666666668
Number Max: 85.0
Greyscale Mean: 34.68378462138137
Greyscale SD: 7.889513880872119
Greyscale Min: 14.160401002506264
Greyscale Max: 52.38095238095238


In [40]:
final_accuracy_df = overall_accuracy_df.merge(number_accuracy_df, on='participant_id', how='left')
final_accuracy_df = final_accuracy_df.merge(greyscale_accuracy_df, on='participant_id', how='left')

final_accuracy_df = final_accuracy_df.sort_values(by='participant_id')

melted_df = final_accuracy_df.melt(id_vars='participant_id', value_vars=['overall_accuracy', 'number_accuracy', 'greyscale_accuracy'], var_name='accuracy_type', value_name='accuracy')

fig = px.line(
    melted_df, 
    x='participant_id', 
    y='accuracy', 
    color='accuracy_type', 
    title='Participant Accuracy Comparison',
    labels={
        'participant_id': 'Participant ID',
        'accuracy': 'Accuracy (%)',
        'accuracy_type': 'Accuracy Type',
        'overall_accuracy': 'Overall'
    }
)

fig.update_xaxes(tickmode='linear', dtick=1)
fig.for_each_trace(lambda t: t.update(name={
    'overall_accuracy': 'Overall Accuracy',
    'number_accuracy': 'Number Accuracy',
    'greyscale_accuracy': 'Greyscale Accuracy'
}[t.name]))
fig.show()

display(p_df.loc[:, p_df.columns != 'completedExperiments'])

Unnamed: 0,id,age,genderIdentity
0,1,27,female
1,2,45,male
2,3,31,male
3,4,27,female
4,5,26,female
5,6,34,male
6,7,19,male
7,8,24,female
8,9,29,male
9,10,28,male
