## Imports

In [53]:
import pyxdf 
import numpy as np
import pandas as pd
from lmfit.models import Model
from os import listdir, getcwd
from os.path import isfile, join
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go

## Function Definitions


Task evoked pupillary response is calculated after correcting for luminance-induced pupil dilation: $𝑇𝐸𝑃𝑅 = 𝑑_m − 𝑑(𝑌)$, where $d_m$ is the measured pupil dilation, and $d(Y)$ is the predicted pupil dilation for the given luminance level. 

Predicted pupil dilation is calculated from a calibration sequence that produces and individual mapping model for each participant. The calibration sequence consists of 8 solid gray colors with varying luminance levels displayed in a psuedo-random order for 6 seconds each. The luminance levels span the range from 0.0 to 0.78, and for each calibration level, the first 0.5s of data is discarded to account for the initial pupillary response to the change in luminance, which can take a maximum of 0.5s. . The individual mapping model is calculated using a non-linear least squares regression to fit the equation $𝑑(𝑌) = 𝑎 · 𝑒^{−𝑏·𝑌} + c$ to the measured pupil dilation data for each participant. 

Pupil dilation data and the average luminance data were collected at 90 Hz, the display rate of the HMD.

See: Eckert, M., Robotham, T., Habets, E. A. P., and Rummukainen, O. S. (2022). Pupillary Light Reflex Correction for Robust Pupillometry in Virtual Reality. Proc. ACM Comput. Graph. Interact. Tech. 5, 1–16. doi: 10.1145/3530798

In [54]:
def pupil_func(x, a, b, c):
    return a * np.exp(-b * x) + c

In [55]:
def import_data(file):
    streams, header = pyxdf.load_xdf(file)
    dfs = {}
    for stream in streams:
        stream_name = stream['info']['name'][0]
        stream_channels = {channel['label'][0]: i for i, channel in enumerate(stream['info']['desc'][0]['channels'][0]['channel'])}
        stream_data = stream['time_series']
        data_dict = {key: np.array(stream_data)[:, index] for key, index in stream_channels.items()}
        data_dict['time'] = np.round(np.array(stream['time_stamps']), decimals=4)
        dfs[stream_name] = pd.DataFrame(data_dict).drop_duplicates(subset=['time']).reset_index(drop=True)
    return dfs

In [56]:
accom_time = pd.to_timedelta(0.5, unit='s')

In [57]:
def process_gaze_luminance_data(stream_df):
    pupil = stream_df['GazeStream'].loc[(stream_df['GazeStream']['LeftEyeIsBlinking'] == 0) & (stream_df['GazeStream']['RightEyeIsBlinking'] == 0) & (stream_df['GazeStream']['LeftPupilDiameter'] > 0) & (stream_df['GazeStream']['RightPupilDiameter'] > 0), ['time', 'MethodID', 'ModelID', 'LeftPupilDiameter', 'RightPupilDiameter']]
    pupil['time'] = pd.to_timedelta(pupil['time'], unit='s')

    lum = stream_df['LuminanceStream'].loc[:, ['time', 'MethodID', 'ModelID', 'Luminance']]
    lum['time'] = pd.to_timedelta(lum['time'], unit='s')

    # Intersection of time stamps
    pupil_lum_time_intersection = np.intersect1d(pupil['time'], lum['time'])

    # Filter pupil and luminance data by intersection
    pupil = pupil[pupil['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)
    lum = lum[lum['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)

    # Combined DataFrame for pupil and luminance
    pupil_lum = pd.DataFrame({
        'time': pd.to_timedelta(lum['time'], unit='s'),
        'luminance': lum['Luminance'],
        'pupilDiameter': 0.5 * (pupil['LeftPupilDiameter'] + pupil['RightPupilDiameter']),
        'methodID': pupil['MethodID'],
        'modelID': pupil['ModelID']
    }).resample('0.1s', on='time').mean()

    pupil_lum['time'] = pupil_lum.index

    return pupil_lum

In [58]:
def process_calibration_data(pupil_lum_df, stream_df):
    calibration_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['EventType'] == 'CalibrationColorChange') | (stream_df['ExperimentStream']['SceneEvent'] == 'Calibration') | (stream_df['ExperimentStream']['SceneEvent'] == 'CalibrationComplete'), ['time','SceneEvent', 'EventType']]
    calibration_events['time'] = pd.to_timedelta(calibration_events['time'], unit='s')
    c_start_times = calibration_events[:8]['time']
    c_end_times = calibration_events[1:]['time']
    c_start_times.reset_index(drop=True, inplace=True)
    c_end_times.reset_index(drop=True, inplace=True)

    calib_data = {}
    for i in range(8):
        calib_data[i] = pupil_lum_df.loc[(pupil_lum_df['time'] >= c_start_times[i]) & (pupil_lum_df['time'] <= c_end_times[i]), ['time','luminance', 'pupilDiameter']]
        calib_data[i]['time'] -= calib_data[i]['time'].iloc[0]
        calib_data[i] = calib_data[i].loc[(calib_data[i]['time'] >= accom_time), ['luminance', 'pupilDiameter']]

    calibration_data = pd.concat(calib_data).groupby(level=0).mean().sort_values(by=['luminance']).reset_index(drop=True)
    return calibration_data

In [59]:
def process_navigation_data(pupil_lum_df, stream_df, a, b, c):
    navigation_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['SceneEvent'] == 'NavigationComplete') | (stream_df['ExperimentStream']['SceneEvent'] == 'Navigation_Trial'), ['time','SceneEvent', 'EventType', 'ModelID', 'MethodID']]
    navigation_events['time'] = pd.to_timedelta(navigation_events['time'], unit='s')
    nav_start_times = navigation_events.loc[navigation_events['SceneEvent'] == 'Navigation_Trial', 'time']
    nav_end_times = navigation_events.loc[navigation_events['SceneEvent'] == 'NavigationComplete', 'time']

    nav_start_times.reset_index(drop=True, inplace=True)
    nav_end_times.reset_index(drop=True, inplace=True)

    #Correct for occasions when Unity emitted multiple SceneLoaded events for a single trial
    if len(nav_start_times) > 8:
        nav_diff = nav_start_times.diff().dt.total_seconds()
        nav_start_times = nav_start_times.loc[(nav_diff.isnull()) | (nav_diff > 3)]

    nav_start_times.reset_index(drop=True, inplace=True)
    nav_end_times.reset_index(drop=True, inplace=True)

    nav_data = {}
    for i in range(8):
        nav_data[i] = pupil_lum_df.loc[(pupil_lum_df['time']>=nav_start_times.loc[i]) & (pupil_lum_df['time']<=nav_end_times.loc[i]), ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        nav_data[i].set_index('time', inplace=True, drop=False)

    navigation_data = pd.concat(nav_data, names=['trial'])
    navigation_data = navigation_data.groupby(level=0).resample('0.5s', on='time', ).mean()
    navigation_data['plr'] = pupil_func(navigation_data['luminance'], a, b, c)
    navigation_data['tepr'] = navigation_data['pupilDiameter'] - navigation_data['plr']
    
    return navigation_data

In [77]:
def process_creation_data(pupil_lum_df, stream_df, a, b, c):
    creation_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['SceneEvent'] == 'Creation_Trial') | (stream_df['ExperimentStream']['SceneEvent'] == 'CreationComplete'), ['time','SceneEvent', 'EventType', 'ModelID', 'MethodID']]
    creation_events['time'] = pd.to_timedelta(creation_events['time'], unit='s')
    crt_start_times = creation_events.loc[creation_events['SceneEvent'] == 'Creation_Trial', 'time']
    crt_end_times = creation_events.loc[creation_events['SceneEvent'] == 'CreationComplete', 'time']

    crt_start_times.reset_index(drop=True, inplace=True)
    crt_end_times.reset_index(drop=True, inplace=True)

    #Correct for occasions when Unity emitted multiple SceneLoaded events for a single trial
    if len(crt_start_times) > 8:
        crt_diff = crt_start_times.diff().dt.total_seconds()
        crt_start_times = crt_start_times.loc[(crt_diff.isnull()) | (crt_diff > 3)]

    crt_start_times.reset_index(drop=True, inplace=True)
    crt_end_times.reset_index(drop=True, inplace=True)

    crt_data = {}
    for i in range(8):
        crt_data[i] = pupil_lum_df.loc[(pupil_lum_df['time']>=crt_start_times.loc[i]) & (pupil_lum_df['time']<=crt_end_times.loc[i]), ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        crt_data[i].set_index('time', inplace=True, drop=False)

    creation_data = pd.concat(crt_data, names=['trial'])
    creation_data = creation_data.groupby(level=0).resample('0.5s', on='time', ).mean()
    creation_data['plr'] = pupil_func(creation_data['luminance'], a, b, c)
    creation_data['tepr'] = creation_data['pupilDiameter'] - creation_data['plr']
    
    return creation_data

In [61]:
def process_discomfort_data(stream_df):
    discomfort_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'Discomfort', ['time', 'Value', 'ModelID', 'MethodID']]
    discomfort_values['time'] = pd.to_timedelta(discomfort_values['time'], unit='s')
    discomfort_values.reset_index(drop=True, inplace=True)
    return discomfort_values

In [62]:
def process_seq_data(stream_df):
    seq_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'SEQ', ['time', 'Value', 'ModelID', 'MethodID']]
    seq_values['time'] = pd.to_timedelta(seq_values['time'], unit='s')
    seq_values.reset_index(drop=True, inplace=True)
    return seq_values

## Import Data

In [63]:
data_dir = join(getcwd(),'Path_Data')
data_files = [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f))]

In [64]:
dfs = []
for file in data_files:
    dfs.append(import_data(file))

## Process Data

In [65]:
user_ids = []
user_nav_data = []
user_crt_data = []
user_models_nav = []
user_methods_nav = []
user_params = []
user_calibration = []
user_models_crt = []
user_methods_crt = []
user_seq = []
user_discomfort = []

for df in dfs:
    user_ids.append(df['ExperimentStream']['UserID'][0]) 

    pupil_lum_df = process_gaze_luminance_data(df)
    calibration_data = process_calibration_data(pupil_lum_df, df)

    user_calibration.append(calibration_data)

    # Fit pupil response to luminance
    x_data = calibration_data['luminance']
    y_data = calibration_data['pupilDiameter']
    exp_mod = Model(pupil_func)
    params = exp_mod.make_params(a=1, b=4, c=0)
    result = exp_mod.fit(y_data, params, x=x_data)
    a = result.params['a'].value
    b = result.params['b'].value
    c = result.params['c'].value

    user_params.append(pd.DataFrame({'params': [a, b, c]}, index=['a', 'b', 'c']))

    navigation_data = process_navigation_data(pupil_lum_df, df, a, b, c)
    navigation_avg = navigation_data.groupby(level=0).mean()

    user_nav_data.append(navigation_avg)

    models = navigation_avg.reset_index(drop=True)
    model_avg = models.groupby(['modelID']).mean()
    model_avg.drop(columns=['methodID'], inplace=True)
    methods = navigation_avg.reset_index(drop=True)
    method_avg = methods.groupby(['methodID']).mean()
    method_avg.drop(columns=['modelID'], inplace=True)

    user_models_nav.append(model_avg)
    user_methods_nav.append(method_avg)

    creation_data = process_creation_data(pupil_lum_df, df, a, b, c)
    creation_avg = creation_data.groupby(level=0).mean()
    creation_avg.drop(columns=['luminance'], inplace=True)

    user_crt_data.append(creation_avg)

    models = creation_avg.reset_index(drop=True)
    model_avg = models.groupby(['modelID']).mean()
    model_avg.drop(columns=['methodID'], inplace=True)
    methods = creation_avg.reset_index(drop=True)
    method_avg = methods.groupby(['methodID']).mean()
    method_avg.drop(columns=['modelID'], inplace=True)

    user_models_crt.append(model_avg)
    user_methods_crt.append(method_avg)
 
    user_discomfort.append(process_discomfort_data(df))
    user_seq.append(process_seq_data(df))

params = pd.concat(user_params, keys=user_ids, names=['UserID'])
model_data_nav = pd.concat(user_models_nav, keys=user_ids, names=['UserID'])
method_data_nav = pd.concat(user_methods_nav, keys=user_ids, names=['UserID'])
model_data_crt = pd.concat(user_models_crt, keys=user_ids, names=['UserID'])
method_data_crt = pd.concat(user_methods_crt, keys=user_ids, names=['UserID'])
nav_data = pd.concat(user_nav_data, keys=user_ids, names=['UserID'])
crt_data = pd.concat(user_crt_data, keys=user_ids, names=['UserID'])
seq_data = pd.concat(user_seq, keys=user_ids, names=['UserID'])
discomfort_data = pd.concat(user_discomfort, keys=user_ids, names=['UserID'])
calibration_data = pd.concat(user_calibration, keys=user_ids, names=['UserID'])

## Statistical Analysis

### Navigation vs Baseline Workload

Calculated from TEPR, which was calculated from the difference between the measured pupil dilation and the predicted pupil dilation for the given luminance value. 

In [66]:
method_nav_tepr = method_data_nav.loc[(slice(None), slice(None)), ('pupilDiameter', 'plr', 'tepr')]

method_tepr_2 = method_data_nav.loc[(slice(None), 2), 'tepr']
method_tepr_3 = method_data_nav.loc[(slice(None), 3), 'tepr']
method_tepr_2.reset_index(drop=True, inplace=True)
method_tepr_3.reset_index(drop=True, inplace=True)

#shapiro-wilk test
stat, p = stats.shapiro(method_tepr_2)
print('Shapiro-Wilk M2 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(method_tepr_3)
print('Shapiro-Wilk M3 = %.3f, p = %.3f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_2)
print('Wilcoxon M2 = %.3f, p = %.5f' % (stat, p))

stat, p = stats.wilcoxon(method_tepr_2)
print('Wilcoxon M3 = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(method_data_nav.loc[(slice(None), 2), 'pupilDiameter'].reset_index(drop=True), method_data_nav.loc[(slice(None), 2), 'plr'].reset_index(drop=True))
print('Paired t-test = %.3f, p = %.6f' % (t_stat, p_val))
t_stat, p_val = stats.ttest_rel(method_data_nav.loc[(slice(None), 3), 'pupilDiameter'].reset_index(drop=True), method_data_nav.loc[(slice(None), 3), 'plr'].reset_index(drop=True))
print('Paired t-test = %.3f, p = %.6f' % (t_stat, p_val))

Shapiro-Wilk M2 = 0.940, p = 0.287
Shapiro-Wilk M3 = 0.898, p = 0.052
Wilcoxon M2 = 0.000, p = 0.00001
Wilcoxon M3 = 0.000, p = 0.00001
Paired t-test = 7.496, p = 0.000001
Paired t-test = 7.738, p = 0.000001


### Navigation Workload by Method

In [67]:
# //Method Map:
# //Bulldozer: 0
# //Spatula: 1
# //FourDoF: 2
# //SixDoF: 3

#shapiro-wilk test
statt, pt = stats.shapiro(method_nav_tepr)
stat0, p0 = stats.shapiro(method_tepr_2)
stat1, p1 = stats.shapiro(method_tepr_3)

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_2, method_tepr_3)

#paired t-test
t_stat, p_val = stats.ttest_rel(method_tepr_2, method_tepr_3)

#descriptive stats (avg, median, std)
print('Method 2')
print('Mean = %.3f' % method_tepr_2.mean())
print('Median = %.3f' % method_tepr_2.median())
print('Std = %.3f' % method_tepr_2.std())

print('Method 3')
print('Mean = %.3f' % method_tepr_3.mean())
print('Median = %.3f' % method_tepr_3.median())
print('Std = %.3f' % method_tepr_3.std())

print('Shapiro-Wilk T = %.3f, p = %.3f' % (statt, pt))
print('Shapiro-Wilk M2 = %.3f, p = %.3f' % (stat0, p0))
print('Shapiro-Wilk M3 = %.3f, p = %.3f' % (stat1, p1))
print('Wilcoxon = %.3f, p = %.3f' % (stat, p))
print('Paired t-test = %.3f, p = %.3f' % (t_stat, p_val))

# values = [['4 DoF', '6 DoF'], ['%.3f ± %.3f' % (method_tepr_2.mean(),  method_tepr_2.std()), '%.3f ± %.3f' % (method_tepr_3.mean(),  method_tepr_3.std())]]
# fig = go.Figure(data=[go.Table(
#     columnwidth = [1,2],
#     header=dict(
#         values=['Method', 'Mean TEPR'],
#         align=['center', 'center'],
#         ),
#     cells=dict(
#         values= values,
#         align=['center', 'center'],
#         ))])
# fig.update_layout(width=600, height=300)
# fig.show()

Method 2
Mean = 0.497
Median = 0.464
Std = 0.281
Method 3
Mean = 0.506
Median = 0.458
Std = 0.277
Shapiro-Wilk T = 0.862, p = 0.000
Shapiro-Wilk M2 = 0.940, p = 0.287
Shapiro-Wilk M3 = 0.898, p = 0.052
Wilcoxon = 81.000, p = 0.865
Paired t-test = -0.265, p = 0.794


In [68]:
df = method_data_nav.loc[(slice(None), slice(None)), 'tepr'].reset_index('methodID')
nav_tepr_fig = px.box(df, x='methodID', y ='tepr', color='methodID', notched=True, title='TEPR by Navigation Method', color_discrete_sequence=px.colors.qualitative.D3)
nav_tepr_fig.update_yaxes(range=[0,1.0])
nav_tepr_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='TEPR',
    width=600,
)
nav_tepr_fig.show()

In [69]:
model_nav_tepr = model_data_nav.loc[(slice(None), slice(None)), ('pupilDiameter', 'plr', 'tepr')]

model_tepr_0 = model_nav_tepr.loc[(slice(None), 0), 'tepr']
model_tepr_1 = model_nav_tepr.loc[(slice(None), 1), 'tepr']
model_tepr_2 = model_nav_tepr.loc[(slice(None), 2), 'tepr']
model_tepr_3 = model_nav_tepr.loc[(slice(None), 3), 'tepr']

#shapiro-wilk test
stat, p = stats.shapiro(model_nav_tepr['tepr'])
print('Shapiro-Wilk total = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_0)
print('Shapiro-Wilk M0 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_1)
print('Shapiro-Wilk M1 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_2)
print('Shapiro-Wilk M2 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_3)
print('Shapiro-Wilk M3 = %.3f, p = %.3f' % (stat, p))

#Friedman test
stat, p = stats.friedmanchisquare(model_tepr_0, model_tepr_1, model_tepr_2, model_tepr_3)
print('Friedman = %.3f, p = %.6f' % (stat, p))


Shapiro-Wilk total = 0.934, p = 0.001
Shapiro-Wilk M0 = 0.911, p = 0.091
Shapiro-Wilk M1 = 0.877, p = 0.023
Shapiro-Wilk M2 = 0.941, p = 0.303
Shapiro-Wilk M3 = 0.917, p = 0.114
Friedman = 4.600, p = 0.203542


### Creation vs Baseline Workload

Calculated from TEPR, which was calculated from the difference between the measured pupil dilation and the predicted pupil dilation for the given luminance value. 


In [79]:
model_crt_tepr = model_data_crt.loc[(slice(None), slice(None)), ('pupilDiameter', 'plr', 'tepr')]
method_crt_tepr = method_data_crt.loc[(slice(None), slice(None)), ('pupilDiameter', 'plr', 'tepr')]

method_tepr_0 = method_data_crt.loc[(slice(None), 0), 'tepr']
method_tepr_1 = method_data_crt.loc[(slice(None), 1), 'tepr']
method_tepr_0.reset_index(drop=True, inplace=True)
method_tepr_1.reset_index(drop=True, inplace=True)

#shapiro-wilk test
stat0, p0 = stats.shapiro(method_tepr_0)
stat1, p1 = stats.shapiro(method_tepr_1)

#shapiro-wilk test
stat, p = stats.shapiro(method_tepr_0)
print('Shapiro-Wilk M0 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(method_tepr_1)
print('Shapiro-Wilk M1 = %.3f, p = %.3f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_0)
print('Wilcoxon M0 = %.3f, p = %.5f' % (stat, p))

stat, p = stats.wilcoxon(method_tepr_1)
print('Wilcoxon M1 = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(method_data_crt.loc[(slice(None), 0), 'pupilDiameter'].reset_index(drop=True), method_data_crt.loc[(slice(None), 0), 'plr'].reset_index(drop=True))
print('Paired t-test = %.3f, p = %.6f' % (t_stat, p_val))
t_stat, p_val = stats.ttest_rel(method_data_crt.loc[(slice(None), 1), 'pupilDiameter'].reset_index(drop=True), method_data_crt.loc[(slice(None), 1), 'plr'].reset_index(drop=True))
print('Paired t-test = %.3f, p = %.6f' % (t_stat, p_val))

Shapiro-Wilk M0 = 0.986, p = 0.995
Shapiro-Wilk M1 = 0.953, p = 0.508
Wilcoxon M0 = 0.000, p = 0.00003
Wilcoxon M1 = 0.000, p = 0.00002
Paired t-test = 8.572, p = 0.000000
Paired t-test = 7.649, p = 0.000001


### Creation Workload by Method

In [80]:
#descriptive stats (avg, median, std)
print('Method 0')
print('Mean = %.3f' % method_tepr_0.mean())
print('Median = %.3f' % method_tepr_0.median())
print('Std = %.3f' % method_tepr_0.std())

print('Method 1')
print('Mean = %.3f' % method_tepr_1.mean())
print('Median = %.3f' % method_tepr_1.median())
print('Std = %.3f' % method_tepr_1.std())

print(method_crt_tepr)
print(method_tepr_0)
print(method_tepr_1)

#shapiro-wilk test
stat, p = stats.shapiro(method_crt_tepr)
print('Shapiro-Wilk total = %.3f, p = %.5f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_0, method_tepr_1)
print('Wilcoxon = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(method_tepr_0, method_tepr_1)
print('Paired t-test = %.3f, p = %.5f' % (t_stat, p_val))


Method 0
Mean = 0.685
Median = 0.649
Std = 0.320
Method 1
Mean = 0.480
Median = 0.494
Std = 0.259
                 pupilDiameter       plr      tepr
UserID methodID                                   
108    0.0            5.117929  3.794466  1.323463
       1.0            4.357677  3.718661  0.639016
109    0.0            3.521332  2.822759  0.698573
       1.0            3.363638  2.796737  0.566901
110    0.0            3.692744  2.685511  1.007233
       1.0            3.587292  2.628947  0.958344
111    0.0            3.866648  2.923340  0.943308
       1.0            3.824324  2.840332  0.983992
112    0.0            3.644294  2.988367  0.655927
       1.0            3.198256  2.958644  0.239612
114    0.0            4.012074  3.897999  0.114075
       1.0            3.763912  3.738576  0.025336
116    0.0            3.815321  3.173613  0.641708
       1.0            3.604076  3.124624  0.479453
117    0.0            4.630815  4.264829  0.365986
       1.0            4.392104  4.2

ValueError: The samples x and y must have the same length.

In [None]:
df = method_data_crt.loc[(slice(None), slice(None)), 'tepr'].reset_index('methodID')
crt_tepr_fig = px.box(df, x='methodID', y ='tepr', color='methodID', notched=True, title='TEPR by Creation Method', color_discrete_sequence=px.colors.qualitative.D3)
crt_tepr_fig.update_yaxes(range=[0,1.3])
crt_tepr_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='TEPR',
    width=600,
)
crt_tepr_fig.show()


In [None]:
model_crt_tepr = model_data_crt.loc[(slice(None), slice(None)), ('pupilDiameter', 'plr', 'tepr')]

model_tepr_0 = model_crt_tepr.loc[(slice(None), 0), 'tepr']
model_tepr_1 = model_crt_tepr.loc[(slice(None), 1), 'tepr']
model_tepr_2 = model_crt_tepr.loc[(slice(None), 2), 'tepr']
model_tepr_3 = model_crt_tepr.loc[(slice(None), 3), 'tepr']

#shapiro-wilk test
stat, p = stats.shapiro(model_crt_tepr['tepr'])
print('Shapiro-Wilk total = %.3f, p = %.3f' % (stat, p))

#Friedman test
stat, p = stats.friedmanchisquare(model_tepr_0, model_tepr_1, model_tepr_2, model_tepr_3)
print('Friedman = %.3f, p = %.6f' % (stat, p))

Shapiro-Wilk total = 0.967, p = 0.059
Friedman = 3.133, p = 0.371521


### Discomfort Scores

In [None]:
discomfort_method = discomfort_data.loc[(slice(None), slice(None)), ['Value', 'MethodID', 'ModelID']]
discomfort_method['Value'] = discomfort_method['Value'].astype(float)
discomfort_method['MethodID'] = discomfort_method['MethodID'].astype(float)
discomfort_method['ModelID'] = discomfort_method['ModelID'].astype(float)

# find the average for each user for each method
discomfort_avg = discomfort_method.groupby(['UserID', 'MethodID']).mean()
discomfort_method_2 = discomfort_avg.loc[(slice(None), 2), 'Value']
discomfort_method_3 = discomfort_avg.loc[(slice(None), 3), 'Value']

print('Method 2')
print('Mean = %.3f' % discomfort_method_2.mean())
print('Median = %.3f' % discomfort_method_2.median())
print('Std = %.3f' % discomfort_method_2.std())

print('Method 3')
print('Mean = %.3f' % discomfort_method_3.mean())
print('Median = %.3f' % discomfort_method_3.median())
print('Std = %.3f' % discomfort_method_3.std())

#shapiro-wilk test
stat, p = stats.shapiro(discomfort_avg['Value'])
print('Shapiro-Wilk total = %.3f, p = %.5f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(discomfort_method_2, discomfort_method_3, zero_method='zsplit')
print('Wilcoxon = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(discomfort_method_2, discomfort_method_3)
print('Paired t-test = %.3f, p = %.5f' % (t_stat, p_val))




Method 2
Mean = 1.844
Median = 0.900
Std = 1.904
Method 3
Mean = 2.367
Median = 2.000
Std = 2.327
Shapiro-Wilk total = 0.864, p = 0.00041
Wilcoxon = 36.000, p = 0.03096
Paired t-test = -2.645, p = 0.01703



Exact p-value calculation does not work if there are zeros. Switching to normal approximation.



In [None]:
discomfort_fig = px.box(discomfort_method, x='MethodID', y='Value', color='MethodID', title='Discomfort Ratings by Navigation Method', notched=True, color_discrete_sequence=px.colors.qualitative.D3)
discomfort_fig.update_yaxes(range=[0,10])
discomfort_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='Discomfort Rating',
    width=600,
)
discomfort_fig.show()

In [None]:
model_discomfort = discomfort_method.groupby(['UserID', 'ModelID']).mean()

discomfort_model_0 = model_discomfort.loc[(slice(None), 0), 'Value']
discomfort_model_1 = model_discomfort.loc[(slice(None), 1), 'Value']
discomfort_model_2 = model_discomfort.loc[(slice(None), 2), 'Value']
discomfort_model_3 = model_discomfort.loc[(slice(None), 3), 'Value']

#shapiro-wilk test
stat, p = stats.shapiro(model_discomfort['Value'])
print('Shapiro-Wilk total = %.3f, p = %.6f' % (stat, p))

#Friedman test
stat, p = stats.friedmanchisquare(discomfort_model_0, discomfort_model_1, discomfort_model_2, discomfort_model_3)
print('Friedman = %.3f, p = %.6f' % (stat, p))

Shapiro-Wilk total = 0.865, p = 0.000000
Friedman = 3.984, p = 0.263240


In [None]:
seq_method = seq_data.loc[(slice(None), slice(None)), ['Value', 'MethodID', 'ModelID']]
seq_method['Value'] = seq_method['Value'].astype(float)
seq_method['MethodID'] = seq_method['MethodID'].astype(float)
seq_method['ModelID'] = seq_method['ModelID'].astype(float)

# find the average for each user for each method
seq_avg = seq_method.groupby(['UserID', 'MethodID']).mean()
seq_method_0 = seq_avg.loc[(slice(None), 0), 'Value']
seq_method_1 = seq_avg.loc[(slice(None), 1), 'Value']
seq_method_2 = seq_avg.loc[(slice(None), 2), 'Value']
seq_method_3 = seq_avg.loc[(slice(None), 3), 'Value']

#shapiro-wilk test
statt01, pt01 = stats.shapiro(seq_avg.loc[(slice(None), [0,1]), 'Value'])
statt23, pt23 = stats.shapiro(seq_avg.loc[(slice(None), [2,3]), 'Value'])

#wilcoxon test
statw_01, p_w_01 = stats.wilcoxon(seq_method_0, seq_method_1, zero_method='pratt')
statw_23, p_w_23 = stats.wilcoxon(seq_method_2, seq_method_3, zero_method='pratt')

#paired t-test
t_stat, p_val = stats.ttest_rel(seq_method_0, seq_method_1)
t_stat2, p_val2 = stats.ttest_rel(seq_method_2, seq_method_3)

#descriptive stats (avg, median, std)
print('Method 0')
print('Mean = %.3f' % seq_method_0.mean())
print('Median = %.3f' % seq_method_0.median())
print('Std = %.3f' % seq_method_0.std())

print('Method 1')
print('Mean = %.3f' % seq_method_1.mean())
print('Median = %.3f' % seq_method_1.median())
print('Std = %.3f' % seq_method_1.std())

print('Method 2')
print('Mean = %.3f' % seq_method_2.mean())
print('Median = %.3f' % seq_method_2.median())
print('Std = %.3f' % seq_method_2.std())

print('Method 3')
print('Mean = %.3f' % seq_method_3.mean())
print('Median = %.3f' % seq_method_3.median())
print('Std = %.3f' % seq_method_3.std())

print('Shapiro-Wilk 01 = %.3f, p = %.5f' % (statt01, pt01))
print('Shapiro-Wilk 23 = %.3f, p = %.5f' % (statt23, pt23))

print('Wilcoxon 01 = %.3f, p = %.5f' % (statw_01, p_w_01))
print('Wilcoxon 23 = %.3f, p = %.5f' % (statw_23, p_w_23))

Method 0
Mean = 0.639
Median = 0.500
Std = 0.708
Method 1
Mean = 0.986
Median = 0.750
Std = 1.086
Method 2
Mean = 0.431
Median = 0.125
Std = 0.593
Method 3
Mean = 0.847
Median = 0.500
Std = 0.904
Shapiro-Wilk 01 = 0.835, p = 0.00009
Shapiro-Wilk 23 = 0.806, p = 0.00002
Wilcoxon 01 = 26.500, p = 0.08727
Wilcoxon 23 = 44.000, p = 0.10681



Exact p-value calculation does not work if there are zeros. Switching to normal approximation.



In [None]:
seq_fig = px.box(seq_method, x='MethodID', y='Value', color='MethodID', title='SEQ Ratings by Method', notched=True, color_discrete_sequence=px.colors.qualitative.D3)
seq_fig.update_yaxes(range=[0,6])
seq_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='SEQ Rating',
    width=600,
)
seq_fig.show()