## Imports

In [34]:
import pyxdf 
import numpy as np
import pandas as pd
from lmfit.models import Model
from os import listdir, getcwd
from os.path import isfile, join
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
import pywt
import math

## Function Definitions


Task evoked pupillary response is calculated after correcting for luminance-induced pupil dilation: $𝑇𝐸𝑃𝑅 = 𝑑_m − 𝑑(𝑌)$, where $d_m$ is the measured pupil dilation, and $d(Y)$ is the predicted pupil dilation for the given luminance level. 

Predicted pupil dilation is calculated from a calibration sequence that produces and individual mapping model for each participant. The calibration sequence consists of 8 solid gray colors with varying luminance levels displayed in a psuedo-random order for 6 seconds each. The luminance levels span the range from 0.0 to 0.78, and for each calibration level, the first 0.5s of data is discarded to account for the initial pupillary response to the change in luminance, which can take a maximum of 0.5s. . The individual mapping model is calculated using a non-linear least squares regression to fit the equation $𝑑(𝑌) = 𝑎 · 𝑒^{−𝑏·𝑌} + c$ to the measured pupil dilation data for each participant. 

Pupil dilation data and the average luminance data were collected at 90 Hz, the display rate of the HMD.

See: Eckert, M., Robotham, T., Habets, E. A. P., and Rummukainen, O. S. (2022). Pupillary Light Reflex Correction for Robust Pupillometry in Virtual Reality. Proc. ACM Comput. Graph. Interact. Tech. 5, 1–16. doi: 10.1145/3530798

In [35]:
def pupil_func(x, a, b, c):
    return a * np.exp(-b * x) + c

In [36]:
def modmax(d):
    # compute signal modulus
    m = [0.0]*len(d)
    for i in range(len(d)):
        m[i] = math.fabs(d[i])
    # if value is larger than both neighbours , and strictly larger than either , then it is a local maximum
    t = [0.0]*len(d)
    for i in range(len(d)):
        ll = m[i -1] if i >= 1 else m[i]
        oo = m[i]
        rr = m[i+1] if i < len(d)-2 else m[i]
        if (ll <= oo and oo >= rr) and (ll < oo or oo > rr):
        # compute magnitude
            t[i] = math.sqrt(d[i]**2)
        else:
            t[i] = 0.0
    return t

In [37]:
def ipa_func(d):
    # obtain 2-level DWT of pupil diameter signal d
    try:
        (cA2 ,cD2 ,cD1) = pywt.wavedec(d,'sym16', 'per', level=2)
    except ValueError :
        return
    # get signal duration (in seconds)
    tt = d.index[-1] - d.index[0]
    # normalize by 1/2 j , j = 2 for 2-level DWT
    cA2 [:] = [x / math.sqrt (4.0) for x in cA2]
    cD1 [:] = [x / math.sqrt (2.0) for x in cD1]
    cD2 [:] = [x / math.sqrt (4.0) for x in cD2]

    # detect modulus maxima , see Listing 2
    cD2m = modmax(cD2)
    # threshold using universal threshold λuniv = σˆp(2logn)
    # where σˆ is the standard deviation of the noise
    λuniv = np.std(cD2m) * math.sqrt (2.0* np.log2(len(cD2m )))
    cD2t = pywt. threshold (cD2m ,λuniv,mode="hard")
    # compute IPA
    ctr = 0
    for i in range(len(cD2t )):
        if math.fabs(cD2t[i]) > 0: ctr += 1
    IPA = float(ctr)/tt.total_seconds()

    return IPA

In [38]:
def import_data(file):
    streams, header = pyxdf.load_xdf(file)
    dfs = {}
    for stream in streams:
        stream_name = stream['info']['name'][0]
        stream_channels = {channel['label'][0]: i for i, channel in enumerate(stream['info']['desc'][0]['channels'][0]['channel'])}
        stream_data = stream['time_series']
        data_dict = {key: np.array(stream_data)[:, index] for key, index in stream_channels.items()}
        data_dict['time'] = np.round(np.array(stream['time_stamps']), decimals=4)
        dfs[stream_name] = pd.DataFrame(data_dict).drop_duplicates(subset=['time']).reset_index(drop=True)
    return dfs

In [39]:
accom_time = pd.to_timedelta(0.5, unit='s')

In [40]:
def process_gaze_luminance_data(stream_df):
    pupil = stream_df['GazeStream'].loc[(stream_df['GazeStream']['LeftEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['RightEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['LeftPupilDiameter'] > 0) 
                                        & (stream_df['GazeStream']['RightPupilDiameter'] > 0), 
                                        ['time', 'MethodID', 'ModelID', 'LeftPupilDiameter', 'RightPupilDiameter']]
    pupil['time'] = pd.to_timedelta(pupil['time'], unit='s')

    lum = stream_df['LuminanceStream'].loc[:, ['time', 'MethodID', 'ModelID', 'Luminance']]
    lum['time'] = pd.to_timedelta(lum['time'], unit='s')

    # Intersection of time stamps
    pupil_lum_time_intersection = np.intersect1d(pupil['time'], lum['time'])

    # Filter pupil and luminance data by intersection
    pupil = pupil[pupil['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)
    lum = lum[lum['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)

    # Combined DataFrame for pupil and luminance
    pupil_lum = pd.DataFrame({
        'time': pd.to_timedelta(pupil_lum_time_intersection, unit='s'),
        'luminance': lum['Luminance'],
        'pupilDiameter': 0.5 * (pupil['LeftPupilDiameter'] + pupil['RightPupilDiameter']),
        'methodID': pupil['MethodID'],
        'modelID': pupil['ModelID']
    })

    return pupil_lum

In [41]:
def process_calibration_data(pupil_lum_df, stream_df):
    calibration_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['EventType'] == 'CalibrationColorChange') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'Calibration') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'CalibrationComplete'), 
                                                           ['time','SceneEvent', 'EventType']]
    calibration_events['time'] = pd.to_timedelta(calibration_events['time'], unit='s')
    c_start_times = calibration_events[:8]['time']
    c_end_times = calibration_events[1:]['time']
    c_start_times.reset_index(drop=True, inplace=True)
    c_end_times.reset_index(drop=True, inplace=True)

    calib_data = {}
    for i in range(8):
        calib_data[i] = pupil_lum_df.loc[(pupil_lum_df['time'] >= c_start_times[i]) & (pupil_lum_df['time'] <= c_end_times[i]), ['time','luminance', 'pupilDiameter']]
        calib_data[i]['time'] -= calib_data[i]['time'].iloc[0]
        calib_data[i] = calib_data[i].loc[(calib_data[i]['time'] >= accom_time), ['luminance', 'pupilDiameter']]

    calibration_data = pd.concat(calib_data).groupby(level=0).mean().sort_values(by=['luminance']).reset_index(drop=True)
    return calibration_data

In [42]:
def process_navigation_data(pupil_lum_df, stream_df, a, b, c):
    nav_start_times = pd.to_timedelta(stream_df['NavigationStream'].groupby(['ModelID', 'MethodID']).first()['time'], unit='s')
    nav_start_times.reset_index(drop=True, inplace=True)
    
    stream_df['SurveyStream']['ModelID'] = stream_df['SurveyStream']['ModelID'].astype(float)
    nav_end_times = stream_df['SurveyStream'].loc[
        (stream_df['SurveyStream']['SurveyType'] == 'Discomfort') & 
        (stream_df['SurveyStream']['ModelID'] < 4), 
        ['time', 'ModelID', 'MethodID']]
    nav_end_times = pd.to_timedelta(nav_end_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s') - pd.offsets.Second(2)
    nav_end_times.reset_index(drop=True, inplace=True)
    
    nav_data = {}
    for i in range(8):
        nav_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['luminance'] >0) & 
            (pupil_lum_df['time']>nav_start_times.loc[i]) & 
            (pupil_lum_df['time']<nav_end_times.loc[i]), 
            ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        nav_data[i].reset_index(drop=True, inplace=True)

    navigation_data = pd.concat(nav_data, names=['trial'])
    navigation_data['pupil_lum_base'] = pupil_func(navigation_data['luminance'], a, b, c)
    navigation_data['adj_pupil'] = navigation_data['pupilDiameter'] - navigation_data['pupil_lum_base']
    
    return navigation_data

In [43]:
def process_creation_data(pupil_lum_df, stream_df, a, b, c):
    
    crt_start_times = stream_df['CreationStream'].loc[
    (stream_df['CreationStream']['EventName'] == 'StartPointRegistered'), 
    ['time', 'ModelID', 'MethodID']]
    crt_start_times = pd.to_timedelta(crt_start_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s')
    crt_start_times.reset_index(drop=True, inplace=True)

    crt_end_times = stream_df['CreationStream'].loc[
    (stream_df['CreationStream']['EventName'] == 'FinishPath'), 
    ['time', 'ModelID', 'MethodID']]
    crt_end_times = pd.to_timedelta(crt_end_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s')
    crt_end_times.reset_index(drop=True, inplace=True)
    
    crt_data = {}
    for i in range(8):
        crt_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['time'] > crt_start_times.loc[i]) & 
            (pupil_lum_df['time'] < crt_end_times.loc[i]), 
            ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        crt_data[i].reset_index(drop=True, inplace=True)

    creation_data = pd.concat(crt_data, names=['trial'])
    creation_data['pupil_lum_base'] = pupil_func(creation_data['luminance'], a, b, c)
    creation_data['adj_pupil'] = creation_data['pupilDiameter'] - creation_data['pupil_lum_base']
    
    return creation_data

In [44]:
def process_discomfort_data(stream_df):
    discomfort_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'Discomfort', ['time', 'Value', 'ModelID', 'MethodID']]
    discomfort_values['time'] = pd.to_timedelta(discomfort_values['time'], unit='s')
    discomfort_values.reset_index(drop=True, inplace=True)
    return discomfort_values

In [45]:
def process_seq_data(stream_df):
    seq_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'SEQ', ['time', 'Value', 'ModelID', 'MethodID']]
    seq_values['time'] = pd.to_timedelta(seq_values['time'], unit='s')
    seq_values.reset_index(drop=True, inplace=True)
    return seq_values

In [46]:
def process_ipa_calc(data):
    methods = []
    ipa = []
    for i in range(8):
        methods.append(data.loc[i]['methodID'].iloc[i])
        pupil = data.loc[i]['pupilDiameter']
        pupil.index = data.loc[i]['time']
        ipa.append(ipa_func(pupil))
        
    return pd.DataFrame({'methodID': methods, 'IPA': ipa})

## Import Data

In [47]:
data_dir = join(getcwd(),'Path_Data')
data_files = [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f))]

In [48]:
dfs = []
for file in data_files:
    dfs.append(import_data(file))

## Process Data

In [49]:
user_ids = []
user_nav_data = []
user_crt_data = []
user_models_nav = []
user_methods_nav = []
user_params = []
user_calibration = []
user_models_crt = []
user_methods_crt = []
user_seq = []
user_discomfort = []
user_ipa_nav = []
user_ipa_crt = []

user_pupil_data = []

for df in dfs:
    user_ids.append(df['ExperimentStream']['UserID'][0]) 

    # Remove final empty row from survey data
    df['SurveyStream'] = df['SurveyStream'].replace(r'^\s*$', np.nan, regex=True).dropna()
    
    pupil_lum_df = process_gaze_luminance_data(df)
    calibration_data = process_calibration_data(pupil_lum_df, df)

    user_calibration.append(calibration_data)

    # Fit pupil response to luminance
    x_data = calibration_data['luminance']
    y_data = calibration_data['pupilDiameter']
    exp_mod = Model(pupil_func)
    params = exp_mod.make_params(a=1, b=4, c=0)
    result = exp_mod.fit(y_data, params, x=x_data)
    a = result.params['a'].value
    b = result.params['b'].value
    c = result.params['c'].value

    user_params.append(pd.DataFrame({'params': [a, b, c]}, index=['a', 'b', 'c']))

    pupil_data = pupil_lum_df.loc[(pupil_lum_df['methodID'] < 5), ['time', 'luminance', 'pupilDiameter']]
    pupil_data.reset_index(drop=True, inplace=True)
    pupil_data['time'] = pupil_data['time'] - pupil_data['time'][0]
    pupil_data['pupil_lum_base'] = pupil_func(pupil_data['luminance'], a, b, c)
    pupil_data['adj_pupil'] = pupil_data['pupilDiameter'] - pupil_data['pupil_lum_base']

    user_pupil_data.append(pupil_data)

    navigation_data = process_navigation_data(pupil_lum_df, df, a, b, c)

    user_nav_data.append(navigation_data)

    ipa_calc = process_ipa_calc(navigation_data)
    ipa_avg = ipa_calc.groupby(['methodID']).mean()
    
    user_ipa_nav.append(ipa_avg)

    navigation_avg = navigation_data.groupby(level=0).mean()

    models = navigation_avg.reset_index(drop=True)
    model_avg = models.groupby(['modelID']).mean()
    model_avg.drop(columns=['methodID'], inplace=True)
    methods = navigation_avg.reset_index(drop=True)
    method_avg = methods.groupby(['methodID']).mean()
    method_avg.drop(columns=['modelID'], inplace=True)

    user_models_nav.append(model_avg)
    user_methods_nav.append(method_avg)

    creation_data = process_creation_data(pupil_lum_df, df, a, b, c)

    user_crt_data.append(creation_data)

    ipa_calc = process_ipa_calc(creation_data)
    ipa_avg = ipa_calc.groupby(['methodID']).mean()

    user_ipa_crt.append(ipa_avg)

    creation_avg = creation_data.groupby(level=0).mean()

    models = creation_avg.reset_index(drop=True)
    model_avg = models.groupby(['modelID']).mean()
    model_avg.drop(columns=['methodID'], inplace=True)
    methods = creation_avg.reset_index(drop=True)
    method_avg = methods.groupby(['methodID']).mean()
    method_avg.drop(columns=['modelID'], inplace=True)

    user_models_crt.append(model_avg)
    user_methods_crt.append(method_avg)
    
    user_discomfort.append(process_discomfort_data(df))
    user_seq.append(process_seq_data(df))

params = pd.concat(user_params, keys=user_ids, names=['UserID'])
model_data_nav = pd.concat(user_models_nav, keys=user_ids, names=['UserID'])
method_data_nav = pd.concat(user_methods_nav, keys=user_ids, names=['UserID'])
model_data_crt = pd.concat(user_models_crt, keys=user_ids, names=['UserID'])
method_data_crt = pd.concat(user_methods_crt, keys=user_ids, names=['UserID'])
nav_data = pd.concat(user_nav_data, keys=user_ids, names=['UserID'])
crt_data = pd.concat(user_crt_data, keys=user_ids, names=['UserID'])
seq_data = pd.concat(user_seq, keys=user_ids, names=['UserID'])
discomfort_data = pd.concat(user_discomfort, keys=user_ids, names=['UserID'])
calibration_data = pd.concat(user_calibration, keys=user_ids, names=['UserID'])
ipa_data_nav = pd.concat(user_ipa_nav, keys=user_ids, names=['UserID'])
ipa_data_crt = pd.concat(user_ipa_crt, keys=user_ids, names=['UserID'])
pupil_data = pd.concat(user_pupil_data, keys=user_ids, names=['UserID'])

### Draw Traces for all Eye Diameter Data

Uncomment to view the traces of the eye diameter data for all participants.

In [50]:
#draw traces for each user
# fig = go.Figure()
# pupil_data.dropna(inplace=True)
# for user in user_ids:
#     fig.add_trace(go.Scatter(x=pupil_data.loc[user]['time'].dt.total_seconds(), y=pupil_data.loc[user]['pupilDiameter'], mode='lines', name=user))
# fig.update_layout(title='Pupil Diameter for each User', xaxis_title='Time', yaxis_title='Pupil Diameter (mm)')
# fig.update_xaxes(range=[60,240])
# fig.show()


## Statistical Analysis

Method 0: One-handed drawing interface (path creation)

Method 1: Two-handed point-placement interface (path creation)

Method 2: 4DOF locomotion (navigation)

Method 3: 6DOF locomotion (navigation)

### Navigation vs Baseline Workload

Calculated from TEPR, which was calculated from the difference between the measured pupil dilation and the predicted pupil dilation for the given luminance value. 

In [68]:
method_nav_tepr = method_data_nav.loc[(slice(None), slice(None)), ('pupilDiameter', 'pupil_lum_base', 'adj_pupil')]

method_tepr_2 = method_data_nav.loc[(slice(None), 2), 'adj_pupil']
method_tepr_3 = method_data_nav.loc[(slice(None), 3), 'adj_pupil']
method_tepr_2.reset_index(drop=True, inplace=True)
method_tepr_3.reset_index(drop=True, inplace=True)

#shapiro-wilk test
stat, p = stats.shapiro(method_tepr_2)
print('Shapiro-Wilk M2 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(method_tepr_3)
print('Shapiro-Wilk M3 = %.3f, p = %.3f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_2)
print('Wilcoxon M2 = %.3f, p = %.5f' % (stat, p))

stat, p = stats.wilcoxon(method_tepr_2)
print('Wilcoxon M3 = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(method_data_nav.loc[(slice(None), 2), 'adj_pupil'].reset_index(drop=True), method_data_nav.loc[(slice(None), 2), 'pupil_lum_base'].reset_index(drop=True))
print('Paired t-test M2 = %.3f, p = %.6f' % (t_stat, p_val))
t_stat, p_val = stats.ttest_rel(method_data_nav.loc[(slice(None), 3), 'adj_pupil'].reset_index(drop=True), method_data_nav.loc[(slice(None), 3), 'pupil_lum_base'].reset_index(drop=True))
print('Paired t-test M3 = %.3f, p = %.6f' % (t_stat, p_val))

Shapiro-Wilk M2 = 0.942, p = 0.215
Shapiro-Wilk M3 = 0.911, p = 0.050
Wilcoxon M2 = 0.000, p = 0.00000
Wilcoxon M3 = 0.000, p = 0.00000
Paired t-test M2 = -18.946, p = 0.000000
Paired t-test M3 = -18.246, p = 0.000000


### Navigation Workload by Method

#### IPA Evaluation

In [52]:
ipa_method_2 = ipa_data_nav.loc[(slice(None), 2), 'IPA']
ipa_method_3 = ipa_data_nav.loc[(slice(None), 3), 'IPA']

ipa_method_2.reset_index(drop=True, inplace=True)
ipa_method_3.reset_index(drop=True, inplace=True)

print('Method 2')
print('Mean = %.3f' % ipa_method_2.mean())
print('Median = %.3f' % ipa_method_2.median())
print('Std = %.3f' % ipa_method_2.std())

print('Method 3')
print('Mean = %.3f' % ipa_method_3.mean())
print('Median = %.3f' % ipa_method_3.median())
print('Std = %.3f' % ipa_method_3.std())


#shapiro-wilk test
stat, p = stats.shapiro(ipa_data_nav['IPA'])
print('Shapiro-Wilk = %.3f, p = %.3f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(ipa_method_2, ipa_method_3)
print('Wilcoxon = %.3f, p = %.5f' % (stat, p))

#t-test
stat, p = stats.ttest_rel(ipa_method_2, ipa_method_3)
print('T-test = %.3f, p = %.5f' % (stat, p))



Method 2
Mean = 0.183
Median = 0.177
Std = 0.036
Method 3
Mean = 0.163
Median = 0.157
Std = 0.041
Shapiro-Wilk = 0.958, p = 0.112
Wilcoxon = 41.000, p = 0.00415
T-test = 3.498, p = 0.00214


In [53]:
df = ipa_data_nav.reset_index(level=1)
nav_ipa_fig = px.box(df, x='methodID', y ='IPA', color='methodID', notched=True, title='IPA by Navigation Method', color_discrete_sequence=px.colors.qualitative.D3)
nav_ipa_fig.update_yaxes(range=[0,0.3])
nav_ipa_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='IPA',
    width=600,
)
nav_ipa_fig.show()





#### TEPR Evaluation

In [54]:
# //Method Map:
# //Bulldozer: 0
# //Spatula: 1
# //FourDoF: 2
# //SixDoF: 3

#shapiro-wilk test
statt, pt = stats.shapiro(method_nav_tepr)
stat0, p0 = stats.shapiro(method_tepr_2)
stat1, p1 = stats.shapiro(method_tepr_3)

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_2, method_tepr_3)

#paired t-test
t_stat, p_val = stats.ttest_rel(method_tepr_2, method_tepr_3)

#descriptive stats (avg, median, std)
print('Method 2')
print('Mean = %.3f' % method_tepr_2.mean())
print('Median = %.3f' % method_tepr_2.median())
print('Std = %.3f' % method_tepr_2.std())

print('Method 3')
print('Mean = %.3f' % method_tepr_3.mean())
print('Median = %.3f' % method_tepr_3.median())
print('Std = %.3f' % method_tepr_3.std())

print('Shapiro-Wilk T = %.3f, p = %.3f' % (statt, pt))
print('Wilcoxon = %.3f, p = %.3f' % (stat, p))
print('Paired t-test = %.3f, p = %.3f' % (t_stat, p_val))

Method 2
Mean = 0.505
Median = 0.483
Std = 0.282
Method 3
Mean = 0.517
Median = 0.469
Std = 0.265
Shapiro-Wilk T = 0.863, p = 0.000
Wilcoxon = 113.000, p = 0.679
Paired t-test = -0.374, p = 0.712


In [55]:
df = method_data_nav.loc[(slice(None), slice(None)), 'adj_pupil'].reset_index('methodID')
nav_tepr_fig = px.box(df, x='methodID', y ='adj_pupil', color='methodID', notched=True, title='TEPR by Navigation Method', color_discrete_sequence=px.colors.qualitative.D3)
nav_tepr_fig.update_yaxes(range=[0,1.0])
nav_tepr_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='TEPR',
    width=600,
)
nav_tepr_fig.show()





In [56]:
model_nav_tepr = model_data_nav.loc[(slice(None), slice(None)), ('pupilDiameter', 'pupil_lum_base', 'adj_pupil')]

model_tepr_0 = model_nav_tepr.loc[(slice(None), 0), 'adj_pupil']
model_tepr_1 = model_nav_tepr.loc[(slice(None), 1), 'adj_pupil']
model_tepr_2 = model_nav_tepr.loc[(slice(None), 2), 'adj_pupil']
model_tepr_3 = model_nav_tepr.loc[(slice(None), 3), 'adj_pupil']

#shapiro-wilk test
stat, p = stats.shapiro(model_nav_tepr['adj_pupil'])
print('Shapiro-Wilk total = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_0)
print('Shapiro-Wilk M0 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_1)
print('Shapiro-Wilk M1 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_2)
print('Shapiro-Wilk M2 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(model_tepr_3)
print('Shapiro-Wilk M3 = %.3f, p = %.3f' % (stat, p))

#Friedman test
stat, p = stats.friedmanchisquare(model_tepr_0, model_tepr_1, model_tepr_2, model_tepr_3)
print('Friedman = %.3f, p = %.6f' % (stat, p))


Shapiro-Wilk total = 0.940, p = 0.001
Shapiro-Wilk M0 = 0.928, p = 0.110
Shapiro-Wilk M1 = 0.911, p = 0.050
Shapiro-Wilk M2 = 0.936, p = 0.165
Shapiro-Wilk M3 = 0.926, p = 0.100
Friedman = 4.855, p = 0.182763


### Creation vs Baseline Workload

In [67]:
model_crt_tepr = model_data_crt.loc[(slice(None), slice(None)), ('pupilDiameter', 'pupil_lum_base', 'adj_pupil')]
method_crt_tepr = method_data_crt.loc[(slice(None), slice(None)), ('pupilDiameter', 'pupil_lum_base', 'adj_pupil')]

method_tepr_0 = method_data_crt.loc[(slice(None), 0), 'adj_pupil']
method_tepr_1 = method_data_crt.loc[(slice(None), 1), 'adj_pupil']
method_tepr_0.reset_index(drop=True, inplace=True)
method_tepr_1.reset_index(drop=True, inplace=True)

#shapiro-wilk test
stat0, p0 = stats.shapiro(method_tepr_0)
stat1, p1 = stats.shapiro(method_tepr_1)

#shapiro-wilk test
stat, p = stats.shapiro(method_tepr_0)
print('Shapiro-Wilk M0 = %.3f, p = %.3f' % (stat, p))

stat, p = stats.shapiro(method_tepr_1)
print('Shapiro-Wilk M1 = %.3f, p = %.3f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_0)
print('Wilcoxon M0 = %.3f, p = %.5f' % (stat, p))

stat, p = stats.wilcoxon(method_tepr_1)
print('Wilcoxon M1 = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(method_data_crt.loc[(slice(None), 0), 'adj_pupil'].reset_index(drop=True), method_data_crt.loc[(slice(None), 0), 'pupil_lum_base'].reset_index(drop=True))
print('Paired t-test = %.3f, p = %.6f' % (t_stat, p_val))
t_stat, p_val = stats.ttest_rel(method_data_crt.loc[(slice(None), 1), 'adj_pupil'].reset_index(drop=True), method_data_crt.loc[(slice(None), 1), 'pupil_lum_base'].reset_index(drop=True))
print('Paired t-test = %.3f, p = %.6f' % (t_stat, p_val))

box_fig = px.box(method_crt_tepr.reset_index('methodID'), x='methodID', y='adj_pupil', color='methodID', notched=True, title='TEPR by Navigation Model', color_discrete_sequence=px.colors.qualitative.D3)
box_fig.update_layout(width=600)
box_fig.show()

Shapiro-Wilk M0 = 0.962, p = 0.528
Shapiro-Wilk M1 = 0.942, p = 0.222
Wilcoxon M0 = 0.000, p = 0.00000
Wilcoxon M1 = 0.000, p = 0.00000
Paired t-test = -17.769, p = 0.000000
Paired t-test = -19.108, p = 0.000000






### Creation Workload by Method

#### IPA Evaluation

In [58]:
ipa_method_0 = ipa_data_crt.loc[(slice(None), 0), 'IPA']
ipa_method_1 = ipa_data_crt.loc[(slice(None), 1), 'IPA']

ipa_method_0.reset_index(drop=True, inplace=True)
ipa_method_1.reset_index(drop=True, inplace=True)

print('Method 0')
print('Mean = %.3f' % ipa_method_0.mean())
print('Median = %.3f' % ipa_method_0.median())
print('Std = %.3f' % ipa_method_0.std())

print('Method 1')
print('Mean = %.3f' % ipa_method_1.mean())
print('Median = %.3f' % ipa_method_1.median())
print('Std = %.3f' % ipa_method_1.std())

#shapiro-wilk test
stat, p = stats.shapiro(ipa_data_crt['IPA'])
print('Shapiro-Wilk = %.3f, p = %.3f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(ipa_method_0, ipa_method_1)
print('Wilcoxon = %.3f, p = %.5f' % (stat, p))

#t-test
stat, p = stats.ttest_rel(ipa_method_0, ipa_method_1)
print('T-test = %.3f, p = %.5f' % (stat, p))

Method 0
Mean = 0.186
Median = 0.182
Std = 0.043
Method 1
Mean = 0.155
Median = 0.154
Std = 0.040
Shapiro-Wilk = 0.979, p = 0.583
Wilcoxon = 40.000, p = 0.00367
T-test = 3.367, p = 0.00292


#### TEPR Evaluation

In [59]:
#descriptive stats (avg, median, std)
print('Method 0')
print('Mean = %.3f' % method_tepr_0.mean())
print('Median = %.3f' % method_tepr_0.median())
print('Std = %.3f' % method_tepr_0.std())

print('Method 1')
print('Mean = %.3f' % method_tepr_1.mean())
print('Median = %.3f' % method_tepr_1.median())
print('Std = %.3f' % method_tepr_1.std())

#shapiro-wilk test
stat, p = stats.shapiro(method_crt_tepr)
print('Shapiro-Wilk total = %.3f, p = %.5f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_0, method_tepr_1)
print('Wilcoxon = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(method_tepr_0, method_tepr_1)
print('Paired t-test = %.3f, p = %.5f' % (t_stat, p_val))


Method 0
Mean = 0.656
Median = 0.649
Std = 0.363
Method 1
Mean = 0.451
Median = 0.481
Std = 0.284
Shapiro-Wilk total = 0.888, p = 0.00000
Wilcoxon = 3.000, p = 0.00000
Paired t-test = 5.173, p = 0.00004


In [60]:
df = method_data_crt.loc[(slice(None), slice(None)), 'adj_pupil'].reset_index('methodID')
crt_tepr_fig = px.box(df, x='methodID', y ='adj_pupil', color='methodID', notched=True, title='TEPR by Creation Method', color_discrete_sequence=px.colors.qualitative.D3)
crt_tepr_fig.update_yaxes(range=[0,1.5])
crt_tepr_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='TEPR',
    width=600,
)
crt_tepr_fig.show()






In [61]:
model_crt_tepr = model_data_crt.loc[(slice(None), slice(None)), ('pupilDiameter', 'pupil_lum_base', 'adj_pupil')]

model_tepr_0 = model_crt_tepr.loc[(slice(None), 0), 'adj_pupil']
model_tepr_1 = model_crt_tepr.loc[(slice(None), 1), 'adj_pupil']
model_tepr_2 = model_crt_tepr.loc[(slice(None), 2), 'adj_pupil']
model_tepr_3 = model_crt_tepr.loc[(slice(None), 3), 'adj_pupil']

#shapiro-wilk test
stat, p = stats.shapiro(model_crt_tepr['adj_pupil'])
print('Shapiro-Wilk total = %.3f, p = %.3f' % (stat, p))

#Friedman test
stat, p = stats.friedmanchisquare(model_tepr_0, model_tepr_1, model_tepr_2, model_tepr_3)
print('Friedman = %.3f, p = %.6f' % (stat, p))

Shapiro-Wilk total = 0.970, p = 0.040
Friedman = 9.436, p = 0.024018


### Discomfort Scores

In [62]:
discomfort_method = discomfort_data.loc[(slice(None), slice(None)), ['Value', 'MethodID', 'ModelID']]
discomfort_method['Value'] = discomfort_method['Value'].astype(float)
discomfort_method['MethodID'] = discomfort_method['MethodID'].astype(float)
discomfort_method['ModelID'] = discomfort_method['ModelID'].astype(float)

# find the average for each user for each method
discomfort_avg = discomfort_method.groupby(['UserID', 'MethodID']).mean()
discomfort_method_2 = discomfort_avg.loc[(slice(None), 2), 'Value']
discomfort_method_3 = discomfort_avg.loc[(slice(None), 3), 'Value']

print('Method 2')
print('Mean = %.3f' % discomfort_method_2.mean())
print('Median = %.3f' % discomfort_method_2.median())
print('Std = %.3f' % discomfort_method_2.std())

print('Method 3')
print('Mean = %.3f' % discomfort_method_3.mean())
print('Median = %.3f' % discomfort_method_3.median())
print('Std = %.3f' % discomfort_method_3.std())

#shapiro-wilk test
stat, p = stats.shapiro(discomfort_avg['Value'])
print('Shapiro-Wilk total = %.3f, p = %.5f' % (stat, p))

#wilcoxon test
stat, p = stats.wilcoxon(discomfort_method_2, discomfort_method_3, zero_method='zsplit')
print('Wilcoxon = %.3f, p = %.5f' % (stat, p))

#paired t-test
t_stat, p_val = stats.ttest_rel(discomfort_method_2, discomfort_method_3)
print('Paired t-test = %.3f, p = %.5f' % (t_stat, p_val))





Exact p-value calculation does not work if there are zeros. Switching to normal approximation.



Method 2
Mean = 1.918
Median = 0.900
Std = 1.866
Method 3
Mean = 2.291
Median = 2.000
Std = 2.109
Shapiro-Wilk total = 0.881, p = 0.00029
Wilcoxon = 72.500, p = 0.07938
Paired t-test = -1.776, p = 0.09019


In [63]:
discomfort_fig = px.box(discomfort_method, x='MethodID', y='Value', color='MethodID', title='Discomfort Ratings by Navigation Method', notched=True, color_discrete_sequence=px.colors.qualitative.D3)
discomfort_fig.update_yaxes(range=[0,10])
discomfort_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='Discomfort Rating',
    width=600,
)
discomfort_fig.show()





In [64]:
model_discomfort = discomfort_method.groupby(['UserID', 'ModelID']).mean()

discomfort_model_0 = model_discomfort.loc[(slice(None), 0), 'Value']
discomfort_model_1 = model_discomfort.loc[(slice(None), 1), 'Value']
discomfort_model_2 = model_discomfort.loc[(slice(None), 2), 'Value']
discomfort_model_3 = model_discomfort.loc[(slice(None), 3), 'Value']

#shapiro-wilk test
stat, p = stats.shapiro(model_discomfort['Value'])
print('Shapiro-Wilk total = %.3f, p = %.6f' % (stat, p))

#Friedman test
stat, p = stats.friedmanchisquare(discomfort_model_0, discomfort_model_1, discomfort_model_2, discomfort_model_3)
print('Friedman = %.3f, p = %.6f' % (stat, p))

Shapiro-Wilk total = 0.886, p = 0.000000
Friedman = 1.761, p = 0.623395


In [65]:
seq_method = seq_data.loc[(slice(None), slice(None)), ['Value', 'MethodID', 'ModelID']]
seq_method['Value'] = seq_method['Value'].astype(float)
seq_method['MethodID'] = seq_method['MethodID'].astype(float)
seq_method['ModelID'] = seq_method['ModelID'].astype(float)

# find the average for each user for each method
seq_avg = seq_method.groupby(['UserID', 'MethodID']).mean()
seq_method_0 = seq_avg.loc[(slice(None), 0), 'Value']
seq_method_1 = seq_avg.loc[(slice(None), 1), 'Value']
seq_method_2 = seq_avg.loc[(slice(None), 2), 'Value']
seq_method_3 = seq_avg.loc[(slice(None), 3), 'Value']

#shapiro-wilk test
statt01, pt01 = stats.shapiro(seq_avg.loc[(slice(None), [0,1]), 'Value'])
statt23, pt23 = stats.shapiro(seq_avg.loc[(slice(None), [2,3]), 'Value'])

#wilcoxon test
statw_01, p_w_01 = stats.wilcoxon(seq_method_0, seq_method_1, zero_method='pratt')
statw_23, p_w_23 = stats.wilcoxon(seq_method_2, seq_method_3, zero_method='pratt')

#paired t-test
t_stat, p_val = stats.ttest_rel(seq_method_0, seq_method_1)
t_stat2, p_val2 = stats.ttest_rel(seq_method_2, seq_method_3)

#descriptive stats (avg, median, std)
print('Method 0')
print('Mean = %.3f' % seq_method_0.mean())
print('Median = %.3f' % seq_method_0.median())
print('Std = %.3f' % seq_method_0.std())

print('Method 1')
print('Mean = %.3f' % seq_method_1.mean())
print('Median = %.3f' % seq_method_1.median())
print('Std = %.3f' % seq_method_1.std())

print('Method 2')
print('Mean = %.3f' % seq_method_2.mean())
print('Median = %.3f' % seq_method_2.median())
print('Std = %.3f' % seq_method_2.std())

print('Method 3')
print('Mean = %.3f' % seq_method_3.mean())
print('Median = %.3f' % seq_method_3.median())
print('Std = %.3f' % seq_method_3.std())

print('Shapiro-Wilk 01 = %.3f, p = %.5f' % (statt01, pt01))
print('Shapiro-Wilk 23 = %.3f, p = %.5f' % (statt23, pt23))

print('Wilcoxon 01 = %.3f, p = %.5f' % (statw_01, p_w_01))
print('Wilcoxon 23 = %.3f, p = %.5f' % (statw_23, p_w_23))

Method 0
Mean = 0.750
Median = 0.750
Std = 0.760
Method 1
Mean = 0.989
Median = 0.875
Std = 1.016
Method 2
Mean = 0.409
Median = 0.000
Std = 0.585
Method 3
Mean = 1.000
Median = 1.125
Std = 0.900
Shapiro-Wilk 01 = 0.862, p = 0.00009
Shapiro-Wilk 23 = 0.823, p = 0.00001
Wilcoxon 01 = 64.000, p = 0.23036
Wilcoxon 23 = 49.000, p = 0.02147



Exact p-value calculation does not work if there are zeros. Switching to normal approximation.



In [66]:
seq_fig = px.box(seq_method, x='MethodID', y='Value', color='MethodID', title='SEQ Ratings by Method', notched=True, color_discrete_sequence=px.colors.qualitative.D3)
seq_fig.update_yaxes(range=[0,6])
seq_fig.update_layout(
    xaxis_title='Method',
    yaxis_title='SEQ Rating',
    width=600,
)
seq_fig.show()



