## Imports

In [1]:
import pyxdf 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from lmfit.models import Model
from os import listdir, getcwd
from os.path import isfile, join
from scipy import stats
from ydata_profiling import ProfileReport

## Function Definitions


Predicted pupil dilation, $d(Y)$, caused by luminance $Y$, is computed with the following equation: $𝑑(𝑌) = 𝑎 · 𝑒^{−𝑏·𝑌} + c$

In [2]:
def pupil_func(x, a, b, c):
    return a * np.exp(-b * x) + c

In [3]:
def import_data(file):
    streams, header = pyxdf.load_xdf(file)
    dfs = {}
    for stream in streams:
        stream_name = stream['info']['name'][0]
        stream_channels = {channel['label'][0]: i for i, channel in enumerate(stream['info']['desc'][0]['channels'][0]['channel'])}
        stream_data = stream['time_series']
        data_dict = {key: np.array(stream_data)[:, index] for key, index in stream_channels.items()}
        data_dict['time'] = np.round(np.array(stream['time_stamps']), decimals=4)
        dfs[stream_name] = pd.DataFrame(data_dict).drop_duplicates(subset=['time']).reset_index(drop=True)
    return dfs

In [4]:
accom_time = pd.to_timedelta(0.5, unit='s')

In [5]:
def process_gaze_luminance_data(stream_df):
    pupil = stream_df['GazeStream'].loc[(stream_df['GazeStream']['LeftEyeIsBlinking'] == 0) & (stream_df['GazeStream']['RightEyeIsBlinking'] == 0) & (stream_df['GazeStream']['LeftPupilDiameter'] > 0) & (stream_df['GazeStream']['RightPupilDiameter'] > 0), ['time', 'MethodID', 'ModelID', 'LeftPupilDiameter', 'RightPupilDiameter']]
    pupil['time'] = pd.to_timedelta(pupil['time'], unit='s')

    lum = stream_df['LuminanceStream'].loc[:, ['time', 'MethodID', 'ModelID', 'Luminance']]
    lum['time'] = pd.to_timedelta(lum['time'], unit='s')

    # Intersection of time stamps
    pupil_lum_time_intersection = np.intersect1d(pupil['time'], lum['time'])

    # Filter pupil and luminance data by intersection
    pupil = pupil[pupil['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)
    lum = lum[lum['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)

    # Combined DataFrame for pupil and luminance
    pupil_lum = pd.DataFrame({
        'time': pd.to_timedelta(lum['time'], unit='s'),
        'luminance': lum['Luminance'],
        'pupilDiameter': 0.5 * (pupil['LeftPupilDiameter'] + pupil['RightPupilDiameter']),
        'methodID': pupil['MethodID'],
        'modelID': pupil['ModelID']
    }).resample('0.1s', on='time').mean()

    pupil_lum['time'] = pupil_lum.index

    return pupil_lum

In [6]:
def process_calibration_data(pupil_lum_df, stream_df):
    calibration_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['EventType'] == 'CalibrationColorChange') | (stream_df['ExperimentStream']['SceneEvent'] == 'Calibration') | (stream_df['ExperimentStream']['SceneEvent'] == 'CalibrationComplete'), ['time','SceneEvent', 'EventType']]
    calibration_events['time'] = pd.to_timedelta(calibration_events['time'], unit='s')
    c_start_times = calibration_events[:8]['time']
    c_end_times = calibration_events[1:]['time']
    c_start_times.reset_index(drop=True, inplace=True)
    c_end_times.reset_index(drop=True, inplace=True)

    calib_data = {}
    for i in range(8):
        calib_data[i] = pupil_lum_df.loc[(pupil_lum_df['time'] >= c_start_times[i]) & (pupil_lum_df['time'] <= c_end_times[i]), ['time','luminance', 'pupilDiameter']]
        calib_data[i]['time'] -= calib_data[i]['time'].iloc[0]
        calib_data[i] = calib_data[i].loc[(calib_data[i]['time'] >= accom_time), ['luminance', 'pupilDiameter']]

    calibration_data = pd.concat(calib_data).groupby(level=0).mean().sort_values(by=['luminance']).reset_index(drop=True)
    return calibration_data

In [7]:
def process_navigation_data(pupil_lum_df, stream_df, a, b, c):
    navigation_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['SceneEvent'] == 'NavigationComplete') | (stream_df['ExperimentStream']['SceneEvent'] == 'Navigation_Trial'), ['time','SceneEvent', 'EventType', 'ModelID', 'MethodID']]
    navigation_events['time'] = pd.to_timedelta(navigation_events['time'], unit='s')
    nav_start_times = navigation_events.loc[navigation_events['SceneEvent'] == 'Navigation_Trial', 'time']
    nav_end_times = navigation_events.loc[navigation_events['SceneEvent'] == 'NavigationComplete', 'time']

    nav_start_times.reset_index(drop=True, inplace=True)
    nav_end_times.reset_index(drop=True, inplace=True)

    #Correct for occasions when Unity emitted multiple SceneLoaded events for a single trial
    if len(nav_start_times) > 8:
        nav_diff = nav_start_times.diff().dt.total_seconds()
        nav_start_times = nav_start_times.loc[(nav_diff.isnull()) | (nav_diff > 3)]

    nav_start_times.reset_index(drop=True, inplace=True)
    nav_end_times.reset_index(drop=True, inplace=True)

    nav_data = {}
    for i in range(8):
        nav_data[i] = pupil_lum_df.loc[(pupil_lum_df['time']>=nav_start_times.loc[i]) & (pupil_lum_df['time']<=nav_end_times.loc[i]), ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        nav_data[i].set_index('time', inplace=True, drop=False)

    navigation_data = pd.concat(nav_data, names=['trial'])
    navigation_data = navigation_data.groupby(level=0).resample('0.5s', on='time', ).mean()
    navigation_data['plr'] = pupil_func(navigation_data['luminance'], a, b, c)
    navigation_data['tepr'] = navigation_data['pupilDiameter'] - navigation_data['plr']
    
    return navigation_data

In [8]:
def process_creation_data(pupil_lum_df, stream_df, a, b, c):
    creation_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['SceneEvent'] == 'Creation_Trial') | (stream_df['ExperimentStream']['SceneEvent'] == 'CreationComplete'), ['time','SceneEvent', 'EventType', 'ModelID', 'MethodID']]
    creation_events['time'] = pd.to_timedelta(creation_events['time'], unit='s')
    crt_start_times = creation_events.loc[creation_events['SceneEvent'] == 'Creation_Trial', 'time']
    crt_end_times = creation_events.loc[creation_events['SceneEvent'] == 'CreationComplete', 'time']

    crt_start_times.reset_index(drop=True, inplace=True)
    crt_end_times.reset_index(drop=True, inplace=True)

    #Correct for occasions when Unity emitted multiple SceneLoaded events for a single trial
    if len(crt_start_times) > 8:
        crt_diff = crt_start_times.diff().dt.total_seconds()
        crt_start_times = crt_start_times.loc[(crt_diff.isnull()) | (crt_diff > 3)]

    crt_start_times.reset_index(drop=True, inplace=True)
    crt_end_times.reset_index(drop=True, inplace=True)

    crt_data = {}
    for i in range(8):
        crt_data[i] = pupil_lum_df.loc[(pupil_lum_df['time']>=crt_start_times.loc[i]) & (pupil_lum_df['time']<=crt_end_times.loc[i]), ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        crt_data[i].set_index('time', inplace=True, drop=False)

    creation_data = pd.concat(crt_data, names=['trial'])
    creation_data = creation_data.groupby(level=0).resample('0.5s', on='time', ).mean()
    creation_data['plr'] = pupil_func(creation_data['luminance'], a, b, c)
    creation_data['tepr'] = creation_data['pupilDiameter'] - creation_data['plr']
    
    return creation_data

In [9]:
def process_discomfort_data(stream_df):
    discomfort_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'Discomfort', ['time', 'Value', 'ModelID', 'MethodID']]
    discomfort_values['time'] = pd.to_timedelta(discomfort_values['time'], unit='s')
    discomfort_values.reset_index(drop=True, inplace=True)
    return discomfort_values

In [10]:
def process_seq_data(stream_df):
    seq_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'SEQ', ['time', 'Value', 'ModelID', 'MethodID']]
    seq_values['time'] = pd.to_timedelta(seq_values['time'], unit='s')
    seq_values.reset_index(drop=True, inplace=True)
    return seq_values

## Import Data

In [11]:
data_dir = join(getcwd(),'Path_Data')
data_files = [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f))]

In [12]:
dfs = []
for file in data_files:
    dfs.append(import_data(file))

## Process Data

In [13]:
user_ids = []
user_nav_data = []
user_crt_data = []
user_models_nav = []
user_methods_nav = []
user_params = []
user_calibration = []
user_models_crt = []
user_methods_crt = []
user_seq = []
user_discomfort = []

for df in dfs:
    user_ids.append(df['ExperimentStream']['UserID'][0]) 

    pupil_lum_df = process_gaze_luminance_data(df)
    calibration_data = process_calibration_data(pupil_lum_df, df)

    # Fit pupil response to luminance
    x_data = calibration_data['luminance']
    y_data = calibration_data['pupilDiameter']
    exp_mod = Model(pupil_func)
    params = exp_mod.make_params(a=1, b=4, c=0)
    result = exp_mod.fit(y_data, params, x=x_data)
    a = result.params['a'].value
    b = result.params['b'].value
    c = result.params['c'].value

    user_params.append(pd.DataFrame({'params': [a, b, c]}, index=['a', 'b', 'c']))

    navigation_data = process_navigation_data(pupil_lum_df, df, a, b, c)
    navigation_avg = navigation_data.groupby(level=0).mean()
    navigation_avg.drop(columns=['luminance'], inplace=True)

    user_nav_data.append(navigation_avg)

    models = navigation_avg.reset_index(drop=True)
    model_avg = models.groupby(['modelID']).mean()
    model_avg.drop(columns=['methodID'], inplace=True)
    methods = navigation_avg.reset_index(drop=True)
    method_avg = methods.groupby(['methodID']).mean()
    method_avg.drop(columns=['modelID'], inplace=True)

    user_models_nav.append(model_avg)
    user_methods_nav.append(method_avg)

    creation_data = process_creation_data(pupil_lum_df, df, a, b, c)
    creation_avg = creation_data.groupby(level=0).mean()
    creation_avg.drop(columns=['luminance'], inplace=True)

    user_crt_data.append(creation_avg)

    models = creation_avg.reset_index(drop=True)
    model_avg = models.groupby(['modelID']).mean()
    model_avg.drop(columns=['methodID'], inplace=True)
    methods = creation_avg.reset_index(drop=True)
    method_avg = methods.groupby(['methodID']).mean()
    method_avg.drop(columns=['modelID'], inplace=True)

    user_models_crt.append(model_avg)
    user_methods_crt.append(method_avg)
 
    user_discomfort.append(process_discomfort_data(df))
    user_seq.append(process_seq_data(df))

params = pd.concat(user_params, keys=user_ids, names=['UserID'])
model_data_nav = pd.concat(user_models_nav, keys=user_ids, names=['UserID'])
method_data_nav = pd.concat(user_methods_nav, keys=user_ids, names=['UserID'])
model_data_crt = pd.concat(user_models_crt, keys=user_ids, names=['UserID'])
method_data_crt = pd.concat(user_methods_crt, keys=user_ids, names=['UserID'])
nav_data = pd.concat(user_nav_data, keys=user_ids, names=['UserID'])
crt_data = pd.concat(user_crt_data, keys=user_ids, names=['UserID'])
seq_data = pd.concat(user_seq, keys=user_ids, names=['UserID'])
discomfort_data = pd.concat(user_discomfort, keys=user_ids, names=['UserID'])

## Statistical Analysis

### Navigation Workload

In [14]:
model_nav_tepr = model_data_nav.loc[(slice(None), slice(None)), 'tepr']
method_nav_tepr = method_data_nav.loc[(slice(None), slice(None)), 'tepr']

method_tepr_2 = method_data_nav.loc[(slice(None), 2), 'tepr']
method_tepr_3 = method_data_nav.loc[(slice(None), 3), 'tepr']
method_tepr_2.reset_index(drop=True, inplace=True)
method_tepr_3.reset_index(drop=True, inplace=True)

#shapiro-wilk test
statt, pt = stats.shapiro(method_nav_tepr)
stat0, p0 = stats.shapiro(method_tepr_2)
stat1, p1 = stats.shapiro(method_tepr_3)

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_2, method_tepr_3)

#paired t-test
t_stat, p_val = stats.ttest_rel(method_tepr_2, method_tepr_3)

#descriptive stats (avg, median, std)
print('Method 2')
print('Mean = %.3f' % method_tepr_2.mean())
print('Median = %.3f' % method_tepr_2.median())
print('Std = %.3f' % method_tepr_2.std())

print('Method 3')
print('Mean = %.3f' % method_tepr_3.mean())
print('Median = %.3f' % method_tepr_3.median())
print('Std = %.3f' % method_tepr_3.std())

print('Shapiro-Wilk T = %.3f, p = %.3f' % (statt, pt))
print('Shapiro-Wilk M2 = %.3f, p = %.3f' % (stat0, p0))
print('Shapiro-Wilk M3 = %.3f, p = %.3f' % (stat1, p1))
print('Wilcoxon = %.3f, p = %.3f' % (stat, p))
print('Paired t-test = %.3f, p = %.3f' % (t_stat, p_val))

Method 2
Mean = 0.491
Median = 0.464
Std = 0.275
Method 3
Mean = 0.490
Median = 0.431
Std = 0.286
Shapiro-Wilk T = 0.928, p = 0.034
Shapiro-Wilk M2 = 0.954, p = 0.548
Shapiro-Wilk M3 = 0.887, p = 0.051
Wilcoxon = 67.000, p = 0.980
Paired t-test = 0.021, p = 0.984


### Creation Workload

In [15]:
model_crt_tepr = model_data_crt.loc[(slice(None), slice(None)), 'tepr']
method_crt_tepr = method_data_crt.loc[(slice(None), slice(None)), 'tepr']

method_tepr_0 = method_data_crt.loc[(slice(None), 0), 'tepr']
method_tepr_1 = method_data_crt.loc[(slice(None), 1), 'tepr']
method_tepr_0.reset_index(drop=True, inplace=True)
method_tepr_1.reset_index(drop=True, inplace=True)

#shapiro-wilk test
statt, pt = stats.shapiro(method_crt_tepr)
stat0, p0 = stats.shapiro(method_tepr_0)
stat1, p1 = stats.shapiro(method_tepr_1)

#wilcoxon test
stat, p = stats.wilcoxon(method_tepr_0, method_tepr_1)

#paired t-test
t_stat, p_val = stats.ttest_rel(method_tepr_0, method_tepr_1)

#descriptive stats (avg, median, std)
print('Method 0')
print('Mean = %.3f' % method_tepr_0.mean())
print('Median = %.3f' % method_tepr_0.median())
print('Std = %.3f' % method_tepr_0.std())

print('Method 1')
print('Mean = %.3f' % method_tepr_1.mean())
print('Median = %.3f' % method_tepr_1.median())
print('Std = %.3f' % method_tepr_1.std())

print('Shapiro-Wilk T = %.3f, p = %.5f' % (statt, pt))
print('Shapiro-Wilk M2 = %.3f, p = %.5f' % (stat0, p0))
print('Shapiro-Wilk M3 = %.3f, p = %.5f' % (stat1, p1))
print('Wilcoxon = %.3f, p = %.5f' % (stat, p))
print('Paired t-test = %.3f, p = %.5f' % (t_stat, p_val))

Method 0
Mean = 0.557
Median = 0.581
Std = 0.300
Method 1
Mean = 0.430
Median = 0.473
Std = 0.275
Shapiro-Wilk T = 0.963, p = 0.33435
Shapiro-Wilk M2 = 0.962, p = 0.69258
Shapiro-Wilk M3 = 0.951, p = 0.50215
Wilcoxon = 5.000, p = 0.00031
Paired t-test = 3.327, p = 0.00460


### Discomfort Scores

In [23]:
discomfort_method = discomfort_data.loc[(slice(None), slice(None)), ['Value', 'MethodID', 'ModelID']]
discomfort_method['Value'] = discomfort_method['Value'].astype(float)
discomfort_method['MethodID'] = discomfort_method['MethodID'].astype(float)
discomfort_method['ModelID'] = discomfort_method['ModelID'].astype(float)

# find the average for each user for each method
discomfort_avg = discomfort_method.groupby(['UserID', 'MethodID']).mean()
discomfort_method_2 = discomfort_avg.loc[(slice(None), 2), 'Value']
discomfort_method_3 = discomfort_avg.loc[(slice(None), 3), 'Value']

#shapiro-wilk test
stat2, p2 = stats.shapiro(discomfort_method_2)
stat3, p3 = stats.shapiro(discomfort_method_3)

#wilcoxon test
#stat, p = stats.wilcoxon(discomfort_method_2, discomfort_method_3, zero_method='zsplit')

#paired t-test
t_stat, p_val = stats.ttest_rel(discomfort_method_2, discomfort_method_3)

#descriptive stats (avg, median, std)
print('Method 2')
print('Mean = %.3f' % discomfort_method_2.mean())
print('Median = %.3f' % discomfort_method_2.median())
print('Std = %.3f' % discomfort_method_2.std())

print('Method 3')
print('Mean = %.3f' % discomfort_method_3.mean())
print('Median = %.3f' % discomfort_method_3.median())
print('Std = %.3f' % discomfort_method_3.std())

print('Shapiro-Wilk T = %.3f, p = %.5f' % (statt, pt))
print('Shapiro-Wilk M2 = %.3f, p = %.5f' % (stat2, p2))
print('Shapiro-Wilk M3 = %.3f, p = %.5f' % (stat3, p3))
#print('Wilcoxon = %.3f, p = %.3f' % (stat, p))
print('Paired t-test = %.3f, p = %.3f' % (t_stat, p_val))




Method 2
Mean = 1.850
Median = 0.900
Std = 1.994
Method 3
Mean = 2.325
Median = 2.000
Std = 2.374
Shapiro-Wilk T = 0.963, p = 0.33435
Shapiro-Wilk M2 = 0.820, p = 0.00503
Shapiro-Wilk M3 = 0.874, p = 0.03184
Paired t-test = -2.328, p = 0.034


In [33]:
seq_method = seq_data.loc[(slice(None), slice(None)), ['Value', 'MethodID', 'ModelID']]
seq_method['Value'] = seq_method['Value'].astype(float)
seq_method['MethodID'] = seq_method['MethodID'].astype(float)
seq_method['ModelID'] = seq_method['ModelID'].astype(float)

# find the average for each user for each method
seq_avg = seq_method.groupby(['UserID', 'MethodID']).mean()
seq_method_0 = seq_avg.loc[(slice(None), 0), 'Value']
seq_method_1 = seq_avg.loc[(slice(None), 1), 'Value']
seq_method_2 = seq_avg.loc[(slice(None), 2), 'Value']
seq_method_3 = seq_avg.loc[(slice(None), 3), 'Value']

#shapiro-wilk test
statt, pt = stats.shapiro(seq_method)

#wilcoxon test
statw_01, p_w_01 = stats.wilcoxon(seq_method_0, seq_method_1, zero_method='pratt')
statw_23, p_w_23 = stats.wilcoxon(seq_method_2, seq_method_3, zero_method='pratt')

#paired t-test
t_stat, p_val = stats.ttest_rel(seq_method_0, seq_method_1)
t_stat2, p_val2 = stats.ttest_rel(seq_method_2, seq_method_3)

#descriptive stats (avg, median, std)
print('Method 0')
print('Mean = %.3f' % seq_method_0.mean())
print('Median = %.3f' % seq_method_0.median())
print('Std = %.3f' % seq_method_0.std())

print('Method 1')
print('Mean = %.3f' % seq_method_1.mean())
print('Median = %.3f' % seq_method_1.median())
print('Std = %.3f' % seq_method_1.std())

print('Method 2')
print('Mean = %.3f' % seq_method_2.mean())
print('Median = %.3f' % seq_method_2.median())
print('Std = %.3f' % seq_method_2.std())

print('Method 3')
print('Mean = %.3f' % seq_method_3.mean())
print('Median = %.3f' % seq_method_3.median())
print('Std = %.3f' % seq_method_3.std())

print('Shapiro-Wilk = %.3f, p = %.5f' % (statt, pt))

print('Wilcoxon 01 = %.3f, p = %.5f' % (statw_01, p_w_01))
print('Wilcoxon 23 = %.3f, p = %.5f' % (statw_23, p_w_23))

Method 0
Mean = 0.547
Median = 0.125
Std = 0.660
Method 1
Mean = 0.938
Median = 0.500
Std = 1.124
Method 2
Mean = 0.422
Median = 0.125
Std = 0.604
Method 3
Mean = 0.797
Median = 0.500
Std = 0.877
Shapiro-Wilk = 0.846, p = 0.00000
Wilcoxon 01 = 22.500, p = 0.08693
Wilcoxon 23 = 41.000, p = 0.24756


