## Imports

In [1]:
import pyxdf 
import numpy as np
import pandas as pd
from lmfit.models import Model
from os import listdir, getcwd
from os.path import isfile, join
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pywt
import math
from pandas.api.types import CategoricalDtype

## Pupillary Functions


Task evoked pupillary response is calculated after correcting for luminance-induced pupil dilation: $𝑇𝐸𝑃𝑅 = 𝑑_m − 𝑑(𝑌)$, where $d_m$ is the measured pupil dilation, and $d(Y)$ is the predicted pupil dilation for the given luminance level. 

Predicted pupil dilation is calculated from a calibration sequence that produces and individual mapping model for each participant. The calibration sequence consists of 8 solid gray colors with varying luminance levels displayed in a psuedo-random order for 6 seconds each. The luminance levels span the range from 0.0 to 0.78, and for each calibration level, the first 0.5s of data is discarded to account for the initial pupillary response to the change in luminance, which can take a maximum of 0.5s. . The individual mapping model is calculated using a non-linear least squares regression to fit the equation $𝑑(𝑌) = 𝑎 · 𝑒^{−𝑏·𝑌} + c$ to the measured pupil dilation data for each participant. 

Pupil dilation data and the average luminance data were collected at 90 Hz, the display rate of the HMD.

See: Eckert, M., Robotham, T., Habets, E. A. P., and Rummukainen, O. S. (2022). Pupillary Light Reflex Correction for Robust Pupillometry in Virtual Reality. Proc. ACM Comput. Graph. Interact. Tech. 5, 1–16. doi: 10.1145/3530798

In [2]:
def pupil_func(x, a, b, c):
    return a * np.exp(-b * x) + c

In [3]:
def modmax(d):
    # compute signal modulus
    m = [0.0]*len(d)
    for i in range(len(d)):
        m[i] = math.fabs(d[i])
    # if value is larger than both neighbours , and strictly larger than either , then it is a local maximum
    t = [0.0]*len(d)
    for i in range(len(d)):
        ll = m[i -1] if i >= 1 else m[i]
        oo = m[i]
        rr = m[i+1] if i < len(d)-2 else m[i]
        if (ll <= oo and oo >= rr) and (ll < oo or oo > rr):
        # compute magnitude
            t[i] = math.sqrt(d[i]**2)
        else:
            t[i] = 0.0
    return t

In [4]:
def ipa_func(d):
    # obtain 2-level DWT of pupil diameter signal d
    try:
        (cA2 ,cD2 ,cD1) = pywt.wavedec(d,'sym16', 'per', level=2)
    except ValueError :
        return
    # get signal duration (in seconds)
    tt = d.index[-1] - d.index[0]
    # normalize by 1/2 j , j = 2 for 2-level DWT
    cA2 [:] = [x / math.sqrt (4.0) for x in cA2]
    cD1 [:] = [x / math.sqrt (2.0) for x in cD1]
    cD2 [:] = [x / math.sqrt (4.0) for x in cD2]

    # detect modulus maxima , see Listing 2
    cD2m = modmax(cD2)

    # threshold using universal threshold λuniv = σˆp(2logn)
    # where σˆ is the standard deviation of the noise
    λuniv = np.std(cD2m) * math.sqrt (2.0* np.log2(len(cD2m )))
    cD2t = pywt.threshold(cD2m ,λuniv,mode="hard")
    
    # compute IPA
    ctr = 0
    for i in range(len(cD2t )):
        if math.fabs(cD2t[i]) > 0: ctr += 1
    IPA = float(ctr)/tt.total_seconds()

    return IPA

## Statistical Functions

In [5]:
def iqr_outlier_indices(data):
    q1 = data.quantile(.25)
    q3 = data.quantile(.75)
    iqr = stats.iqr(data, nan_policy='omit', rng=(25, 75))
    return np.where((data < (q1 - 1.5 * iqr)) | (data > (q3 + 1.5 * iqr)))

In [6]:
def iqr_stats(data):
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = stats.iqr(data, nan_policy='omit', rng=(25, 75))
    return iqr, q1, q3

In [7]:
def get_results_colors(np, wp, tp):
    pastels = px.colors.qualitative.Pastel2
    default_color = 'white'
    significant_color = pastels[0]
    non_significant_color = pastels[3]

    normal_color = non_significant_color if np < 0.05 else significant_color
    wilcox_color = default_color
    ttest_color = default_color
    if np < 0.05:
        wilcox_color = significant_color if wp < 0.05 else non_significant_color
    else:
        ttest_color = significant_color if tp < 0.05 else non_significant_color

    fill_color = [[default_color, default_color, default_color],
                  [default_color, wilcox_color, ttest_color] , 
                  [normal_color, wilcox_color, ttest_color]]
    
    return fill_color

## Data Processing Functions

In [8]:
def import_data(file):
    streams, header = pyxdf.load_xdf(file)
    dfs = {}
    for stream in streams:
        stream_name = stream['info']['name'][0]
        stream_channels = {channel['label'][0]: i for i, channel in enumerate(stream['info']['desc'][0]['channels'][0]['channel'])}
        stream_data = stream['time_series']
        data_dict = {key: np.array(stream_data)[:, index] for key, index in stream_channels.items()}
        data_dict['time'] = np.round(np.array(stream['time_stamps']), decimals=4)
        dfs[stream_name] = pd.DataFrame(data_dict).drop_duplicates(subset=['time']).reset_index(drop=True)
    return dfs

In [9]:
accom_time = pd.to_timedelta(0.5, unit='s')

In [76]:
method_cats = CategoricalDtype(['4DoF','6DoF', 'unimanual','bimanual'], ordered=False)
model_cats = CategoricalDtype(['A', 'B', 'C', 'D'], ordered=True)
block_cats = CategoricalDtype(['0', '1', '2', '3'], ordered=True)
event_cats = CategoricalDtype(['Start', 'PointPlaced', 'Move', 'End', 'Draw', 'Erase', 'PointDeleted'], ordered=False)
target_cats = CategoricalDtype(['1','2'], ordered=False)
trial_cats = CategoricalDtype(['0','1','2','3'], ordered=True)
data_names = ['id', 'block', 'model', 'method']

In [11]:
def process_gaze_luminance_data(stream_df):
    pupil = stream_df['GazeStream'].loc[(stream_df['GazeStream']['LeftEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['RightEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['LeftPupilDiameter'] > 0) 
                                        & (stream_df['GazeStream']['RightPupilDiameter'] > 0), 
                                        ['time', 'MethodID', 'ModelID', 'LeftPupilDiameter', 'RightPupilDiameter']]
    pupil['time'] = pd.to_timedelta(pupil['time'], unit='s')

    lum = stream_df['LuminanceStream'].loc[:, ['time', 'MethodID', 'ModelID', 'Luminance']]
    lum['time'] = pd.to_timedelta(lum['time'], unit='s')

    # Intersection of time stamps
    pupil_lum_time_intersection = np.intersect1d(pupil['time'], lum['time'])

    # Filter pupil and luminance data by intersection
    pupil = pupil[pupil['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)
    lum = lum[lum['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)

    # Combined DataFrame for pupil and luminance
    pupil_lum = pd.DataFrame({
        'time': pd.to_timedelta(pupil_lum_time_intersection, unit='s'),
        'luminance': lum['Luminance'],
        'pupilDiameter': 0.5 * (pupil['LeftPupilDiameter'] + pupil['RightPupilDiameter']),
        'methodID': pupil['MethodID'],
        'modelID': pupil['ModelID']
    })

    outliers = iqr_outlier_indices(pupil_lum['pupilDiameter'])
    pupil_lum = pupil_lum.drop(pupil_lum.iloc[outliers].index).reset_index(drop=True)

    return pupil_lum

In [12]:
def process_calibration_data(pupil_lum_df, stream_df):
    calibration_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['EventType'] == 'CalibrationColorChange') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'Calibration') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'CalibrationComplete'), 
                                                           ['time','SceneEvent', 'EventType']]
    calibration_events['time'] = pd.to_timedelta(calibration_events['time'], unit='s')
    c_start_times = calibration_events[:8]['time']
    c_end_times = calibration_events[1:]['time']
    c_start_times.reset_index(drop=True, inplace=True)
    c_end_times.reset_index(drop=True, inplace=True)

    calib_data = {}
    for i in range(8):
        calib_data[i] = pupil_lum_df.loc[(pupil_lum_df['time'] >= c_start_times[i]) & (pupil_lum_df['time'] <= c_end_times[i]), ['time','luminance', 'pupilDiameter']]
        calib_data[i]['time'] -= calib_data[i]['time'].iloc[0]
        calib_data[i] = calib_data[i].loc[(calib_data[i]['time'] >= accom_time), ['luminance', 'pupilDiameter']]

    calibration_data = pd.concat(calib_data).groupby(level=0).mean().sort_values(by=['luminance']).reset_index(drop=True)
    return calibration_data

In [13]:
def process_navigation_data(pupil_lum_df, stream_df, a, b, c):
    grouped_data = stream_df['NavigationStream'].groupby(['ModelID', 'MethodID'])

    stream_df['SurveyStream']['ModelID'] = stream_df['SurveyStream']['ModelID'].astype(float)
    stream_df['SurveyStream']['MethodID'] = stream_df['SurveyStream']['MethodID'].astype(float)
    
    discomfort_survey = stream_df['SurveyStream'].loc[
        (stream_df['SurveyStream']['SurveyType'] == 'Discomfort') & 
        (stream_df['SurveyStream']['ModelID'] < 4), 
        ['time', 'ModelID', 'MethodID']]
    survey_group = discomfort_survey.groupby(['ModelID', 'MethodID'])

    start_times = []
    end_times = []
    total_time = []

    for i in range(4):
        for j in range(2,4):
            trial = grouped_data.get_group((i, j))

            start = trial.loc[(trial['spline_percent'] > 0.001)].index[0]
            start_time = pd.to_timedelta(stream_df['NavigationStream'].loc[start, 'time'], unit='s')

            end = trial.loc[(trial['spline_percent'] > 0.995)]
            end_time = 0
            # For 6DoF navigation, completion was determined by collision with bounding box
            # Spline percentage was based on projection, so it may not reach > 0.995.
            # In this case, the survey time serves as the end time (rather than lowering the threshold)
            if len(end) > 0:
                end = end.index[0]
                end_time = pd.to_timedelta(stream_df['NavigationStream'].loc[end, 'time'], unit='s')
            else:
                end = survey_group.get_group((i, j)).index[0]
                end_time = pd.to_timedelta(stream_df['SurveyStream'].loc[end, 'time'], unit='s') - pd.offsets.Second(3)
            
            start_times.append(start_time)
            end_times.append(end_time)
            total_time.append((end_time - start_time).total_seconds())


    nav_start_times = start_times
    nav_end_times = end_times

    nav_data = {}
    for i in range(8):
        nav_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['luminance'] >0) & 
            (pupil_lum_df['time']>nav_start_times[i]) & 
            (pupil_lum_df['time']<nav_end_times[i]), 
            ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        nav_data[i].reset_index(drop=True, inplace=True)

    navigation_data = pd.concat(nav_data, names=['trial'])
    navigation_data['pupil_lum_base'] = pupil_func(navigation_data['luminance'], a, b, c)
    navigation_data['adj_pupil'] = navigation_data['pupilDiameter'] - navigation_data['pupil_lum_base']

    return navigation_data, total_time

In [14]:
def process_creation_data(pupil_lum_df, stream_df, a, b, c):
    
    crt_start_times = stream_df['CreationStream'].loc[(stream_df['CreationStream']['EventName'] == 'StartPointRegistered'), ['time', 'ModelID', 'MethodID']]
    crt_start_times = pd.to_timedelta(crt_start_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s') + pd.offsets.Second(2)
    crt_start_times.reset_index(drop=True, inplace=True)

    crt_end_times = stream_df['CreationStream'].loc[(stream_df['CreationStream']['EventName'] == 'FinishPath'), ['time', 'ModelID', 'MethodID']]
    crt_end_times = pd.to_timedelta(crt_end_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s')
    crt_end_times.reset_index(drop=True, inplace=True)
    
    total_time = crt_end_times - crt_start_times
    total_time = total_time.apply(lambda x: x.total_seconds())

    crt_data = {}
    for i in range(8):
        crt_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['time'] > crt_start_times.loc[i]) & 
            (pupil_lum_df['time'] < crt_end_times.loc[i]), 
            ['time', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        crt_data[i].reset_index(drop=True, inplace=True)

    creation_data = pd.concat(crt_data, names=['trial'])
    creation_data['pupil_lum_base'] = pupil_func(creation_data['luminance'], a, b, c)
    creation_data['adj_pupil'] = creation_data['pupilDiameter'] - creation_data['pupil_lum_base']
    
    return creation_data, total_time

In [15]:
def process_creation_stats(stream_df):

    group = stream_df['CreationStream'].groupby(['ModelID', 'MethodID'])
    creation_counts  = []

    for i in range(4):
            for j in range(0,2):
                trial = group.get_group((i,j))
                creation_counts.append(trial.groupby('EventType', observed=False).size().fillna(0))
    
    keys = [(i,j) for i in range(4) for j in range(0,2)]
    creation_stats = pd.concat(creation_counts, axis=0, keys=keys, names=['ModelID', 'MethodID']).unstack(level=2)
    creation_stats = creation_stats.drop(columns=['End', 'Start'])
    return creation_stats

In [16]:
def process_target_trial_data_crt(stream_df):
    target_set = stream_df['ExperimentStream'].loc[
        (stream_df['ExperimentStream']['EventType'] == 'TargetPointsSet') | 
        (stream_df['ExperimentStream']['EventType'] == 'SceneLoaded') & 
        (stream_df['ExperimentStream']['SceneEvent'] != 'Calibration') & 
        (stream_df['ExperimentStream']['MethodID'] != '2') & 
        (stream_df['ExperimentStream']['MethodID'] != '3'), ['SceneEvent', 'EventType', 'ModelID', 'MethodID']]

    generic_idx = np.where(target_set['ModelID'] == 'generic')[0]
    idx = [x+1 for x in generic_idx]

    for i in range(len(generic_idx)):
        target_set.loc[target_set.index[generic_idx[i]], 'ModelID']  = target_set.loc[target_set.index[idx[i]], 'ModelID']
        target_set.loc[target_set.index[generic_idx[i]], 'MethodID']  = target_set.loc[target_set.index[idx[i]], 'MethodID']

    target_set['MethodID'] = target_set['MethodID'].astype(float)
    target_set['ModelID'] = target_set['ModelID'].astype(float)
    target_set = target_set.loc[(target_set['ModelID'] < 4) & (target_set['EventType'] == 'TargetPointsSet'), ['SceneEvent', 'ModelID', 'MethodID']]

    target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set1', target_cats.categories[0])['SceneEvent']
    target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set2', target_cats.categories[1])['SceneEvent']

    target_set['targetID'] = target_set['SceneEvent']
    target_set.drop(columns=['SceneEvent'], inplace=True)
    target_set['trialID'] = [x % 4 for x in range(0,8)]
    target_set['trialID'] = target_set['trialID'].astype('string').astype(trial_cats)

    return target_set


In [17]:
def process_target_trial_data_nav(stream_df):
    target_set = df['NavigationStream'].loc[(df['NavigationStream']['ModelID'] < 4), ['model_source', 'ModelID', 'MethodID', 'time']]
target_set['time'] = pd.to_timedelta(target_set['time'], unit='s')

target_set['trial_id'] = -1
target_set['creation_method'] = '-1'
target_set['target_set_source'] = '-1'

trials = []

for i in range(8):
    trial = target_set.loc[(target_set['time'] >= start_times[i]) & (target_set['time'] <= end_times[i])]
    trials.append(trial)

trials.sort(key=lambda x: x['time'].iloc[0])   

ids = [x%4 for x in range(0,8)]

for i in range(8):
    trials[i].loc[:, 'trial_id'] = ids[i]

    model_source = trials[i]['model_source'].iloc[0]
    method = '-1'
    method_id = -1
    if model_source - 16 > 8:
        method = 'unimanual'
        method_id = 0
    else:
        method = 'bimanual'
        method_id = 1

    trials[i].loc[:, 'creation_method'] = method

    model_i = trials[i]['ModelID'].iloc[0]
    group = target_set_group.get_group((int(model_i),int(method_id)))

    target = group['targetID'].iloc[0]
    trials[i].loc[:, 'target_set_source'] = target

    target_set = pd.concat(trials)

    target_set['trial_id'] = target_set['trial_id'].astype('string').astype(trial_cats)
    target_set['creation_method'] = target_set['creation_method'].astype('string').astype(method_cats)
    target_set['target_set_source'] = target_set['target_set_source'].astype('string').astype(target_cats)

In [18]:
def process_discomfort_data(stream_df):
    discomfort_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'Discomfort', ['time', 'Value', 'ModelID', 'MethodID']]
    discomfort_values['time'] = pd.to_timedelta(discomfort_values['time'], unit='s')
    discomfort_values.reset_index(drop=True, inplace=True)
    return discomfort_values

In [19]:
def process_seq_data(stream_df):
    seq_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'SEQ', ['time', 'Value', 'ModelID', 'MethodID']]
    seq_values['time'] = pd.to_timedelta(seq_values['time'], unit='s')
    seq_values.reset_index(drop=True, inplace=True)
    return seq_values

In [20]:
def process_ipa_calc(data):
    methods = []
    models = []
    ipa = []
    for i in range(8):
        methods.append(data.loc[i]['methodID'].iloc[i])
        models.append(data.loc[i]['modelID'].iloc[i])
        pupil = data.loc[i]['pupilDiameter']
        pupil.index = data.loc[i]['time']
        ipa.append(ipa_func(pupil))
        
    return pd.DataFrame({'methodID': methods, 'modelID': models, 'IPA': ipa})

## Import Data

In [21]:
data_dir = join(getcwd(),'Path_Data')
data_files = [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f))]

In [22]:
dfs = []
for file in data_files:
    dfs.append(import_data(file))

## Process Data

In [23]:
user_dfs_nav = []
user_dfs_crt = []
ids = []
blocks = []

for df in dfs:
    id = df['ExperimentStream']['UserID'][0]
    block = df['ExperimentStream']['BlockID'][0]
    ids.append(id)
    blocks.append(block)

    # Remove final empty row from string data streams
    df['SurveyStream'] = df['SurveyStream'].replace(r'^\s*$', np.nan, regex=True).dropna()
    df['CreationStream'] = df['CreationStream'].replace(r'^\s*$', np.nan, regex=True).dropna()

    df['SurveyStream']['ModelID'] = df['SurveyStream']['ModelID'].astype(float)
    df['SurveyStream']['MethodID'] = df['SurveyStream']['MethodID'].astype(float)
    df['SurveyStream']['Value'] = df['SurveyStream']['Value'].astype(float)

    df['CreationStream']['ModelID'] = df['CreationStream']['ModelID'].astype(float)
    df['CreationStream']['MethodID'] = df['CreationStream']['MethodID'].astype(float)
    df['CreationStream']['EventType'] = df['CreationStream']['EventType'].astype(event_cats)

    pupil_lum_df = process_gaze_luminance_data(df)
    calibration_data = process_calibration_data(pupil_lum_df, df)

    # Fit pupil response to luminance
    x_data = calibration_data['luminance']
    y_data = calibration_data['pupilDiameter']
    exp_mod = Model(pupil_func)
    params = exp_mod.make_params(a=1, b=4, c=0)
    result = exp_mod.fit(y_data, params, x=x_data)
    a = result.params['a'].value
    b = result.params['b'].value
    c = result.params['c'].value

    navigation_data, nav_time = process_navigation_data(pupil_lum_df, df, a, b, c)
    ipa_calc_nav = process_ipa_calc(navigation_data)
    creation_data, crt_time = process_creation_data(pupil_lum_df, df, a, b, c)
    ipa_calc_crt = process_ipa_calc(creation_data)
    creation_stats = process_creation_stats(df)
    creation_target_trials = process_target_trial_data_crt(df)
    discomfort = process_discomfort_data(df)
    seq = process_seq_data(df)

    nav_trials = navigation_data.groupby(['modelID', 'methodID'])
    ipa_nav_trials = ipa_calc_nav.groupby(['modelID', 'methodID'])
    crt_trials = creation_data.groupby(['modelID', 'methodID'])
    ipa_crt_trials = ipa_calc_crt.groupby(['modelID', 'methodID'])
    discomfort_trials = discomfort.groupby(['ModelID', 'MethodID'])
    seq_trials = seq.groupby(['ModelID', 'MethodID'])
    crt_target_trials = creation_target_trials.groupby(['ModelID', 'MethodID'])

    nav_data = {}
    nav_total_time = {}
    ipa_nav_data = {}
    crt_data = {}
    crt_total_time = {}
    ipa_crt_data = {}
    discomfort_data = {}
    seq_crt = {}
    seq_nav = {}
    target_data_crt = {}
    target_data_nav = {}
    trial_data_crt = {}
    trial_data_nav = {}

    for i in range(4):
        for j in range(2,4):
            nav_data[(id, block, i, j)] = nav_trials.get_group((i,j)).mean()
            nav_total_time[(id, block, i, j)] = nav_time[(2*i + (j-2))]
            ipa_nav_data[(id, block, i, j)] = ipa_nav_trials.get_group((i,j)).mean()
            discomfort_data[(id, block, i, j)] = discomfort_trials.get_group((i,j)).mean()
            seq_nav[(id, block, i, j)] = seq_trials.get_group((i,j)).mean()
    
    for i in range(4):
        for j in range(0,2):
            crt_data[(id, block, i, j)] = crt_trials.get_group((i,j)).mean()
            crt_total_time[(id, block, i, j)] = crt_time[(2*i + j)]
            ipa_crt_data[(id, block, i, j)] = ipa_crt_trials.get_group((i,j)).mean()
            seq_crt[(id, block, i, j)] = seq_trials.get_group((i,j)).mean()
            target_data_crt[(id, block, i, j)] = crt_target_trials.get_group((i,j))['targetID'].iloc[0]
            trial_data_crt[(id, block, i, j)] = crt_target_trials.get_group((i,j))['trialID'].iloc[0]
    
    nav_index = pd.MultiIndex.from_product([[id], [block], model_cats.categories, method_cats.categories[0:2]], names=data_names)
    crt_index = pd.MultiIndex.from_product([[id], [block], model_cats.categories, method_cats.categories[2:4]], names=data_names)

    nav_data = pd.concat(nav_data, axis=1, names=data_names).T
    nav_data['total_time'] = nav_total_time
    nav_data.index = nav_index
    nav_data.drop(columns=['time', 'modelID', 'methodID'], inplace=True)
    
    crt_data = pd.concat(crt_data, axis=1, names=data_names).T
    crt_data['total_time'] = crt_total_time
    crt_data['target_id'] = target_data_crt
    crt_data['trial_id'] = trial_data_crt
    crt_data.index = crt_index
    crt_data.drop(columns=['time', 'modelID', 'methodID'], inplace=True)

    creation_stats.index = crt_index

    ipa_nav_data = pd.concat(ipa_nav_data, axis=1, names=data_names).T
    ipa_nav_data.index = nav_index
    ipa_nav_data.drop(columns=['modelID', 'methodID'], inplace=True)

    ipa_crt_data = pd.concat(ipa_crt_data, axis=1, names=data_names).T
    ipa_crt_data.index = crt_index
    ipa_crt_data.drop(columns=['modelID', 'methodID'], inplace=True)

    discomfort_data = pd.concat(discomfort_data, axis=1, names=data_names).T
    discomfort_data.index = nav_index
    discomfort_data['discomfort'] = discomfort_data['Value']
    discomfort_data.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    seq_nav = pd.concat(seq_nav, axis=1, names=data_names).T
    seq_nav.index = nav_index
    seq_nav['seq'] = seq_nav['Value']
    seq_nav.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    seq_crt = pd.concat(seq_crt, axis=1, names=data_names).T
    seq_crt.index = crt_index
    seq_crt['seq'] = seq_crt['Value']
    seq_crt.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    df_crt = pd.concat([crt_data, ipa_crt_data, seq_crt, creation_stats], axis=0).stack().unstack()
    df_crt.loc[(slice(None), slice(None), slice(None), slice(None)), ('Draw', 'Erase', 'PointPlaced', 'Move', 'PointDeleted')] = df_crt.loc[(slice(None), slice(None), slice(None), slice(None)), ('Draw', 'Erase', 'PointPlaced', 'Move', 'PointDeleted')].astype(int)
    df_crt.loc[(slice(None), slice(None), slice(None), 'unimanual'), ('Draw', 'Erase')] = df_crt.loc[(slice(None), slice(None), slice(None), 'unimanual'), ('Draw', 'Erase')].astype(int).fillna(0)
    df_crt.loc[(slice(None), slice(None), slice(None), 'bimanual'), ('PointPlaced', 'Move', 'PointDeleted')] = df_crt.loc[(slice(None), slice(None), slice(None), 'bimanual'), ('PointPlaced', 'Move', 'PointDeleted')].astype(int).fillna(0)

    df_nav = pd.concat([nav_data, ipa_nav_data, discomfort_data, seq_nav], axis=0).stack().unstack()

    user_dfs_nav.append(df_nav)
    user_dfs_crt.append(df_crt)


In [24]:
user_data_nav = pd.concat(user_dfs_nav)
nav_dtype = {'luminance': 'float64', 'pupilDiameter': 'float64', 'pupil_lum_base': 'float64', 'adj_pupil': 'float64', 'IPA': 'float64', 'discomfort': 'int32', 'seq': 'int32', 'total_time': 'float64'}
user_data_nav = user_data_nav.astype(nav_dtype)

short_nav = user_data_nav['total_time'] < 10
user_data_nav = user_data_nav[~short_nav]

user_data_crt = pd.concat(user_dfs_crt)
crt_dtype = {'luminance' : 'float64', 'pupilDiameter' : 'float64', 'pupil_lum_base' : 'float64', 'adj_pupil' : 'float64', 'IPA' : 'float64', 'seq' : 'int32', 'PointPlaced' : 'int32', 'Move' : 'int32', 'Draw' : 'int32', 'Erase' : 'int32', 'PointDeleted' : 'int32', 'total_time': 'float64'}
user_data_crt = user_data_crt.astype(crt_dtype)

short_crt = user_data_crt['total_time'] < 10
user_data_crt = user_data_crt[~short_crt]

In [25]:
target_set = df['ExperimentStream'].loc[(df['ExperimentStream']['EventType'] == 'TargetPointsSet') | (df['ExperimentStream']['EventType'] == 'SceneLoaded') & (df['ExperimentStream']['SceneEvent'] != 'Calibration') & (df['ExperimentStream']['MethodID'] != '2') & (df['ExperimentStream']['MethodID'] != '3'), ['SceneEvent', 'EventType', 'ModelID', 'MethodID']]

generic_idx = np.where(target_set['ModelID'] == 'generic')[0]
idx = [x+1 for x in generic_idx]

for i in range(len(generic_idx)):
    target_set.loc[target_set.index[generic_idx[i]], 'ModelID']  = target_set.loc[target_set.index[idx[i]], 'ModelID']
    target_set.loc[target_set.index[generic_idx[i]], 'MethodID']  = target_set.loc[target_set.index[idx[i]], 'MethodID']

target_set['MethodID'] = target_set['MethodID'].astype(float)
target_set['ModelID'] = target_set['ModelID'].astype(float)
target_set = target_set.loc[(target_set['ModelID'] < 4) & (target_set['EventType'] == 'TargetPointsSet'), ['SceneEvent', 'ModelID', 'MethodID']]

target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set1', target_cats.categories[0])['SceneEvent']
target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set2', target_cats.categories[1])['SceneEvent']

target_set['targetID'] = target_set['SceneEvent']
target_set.drop(columns=['SceneEvent'], inplace=True)
target_set['trialID'] = [x%4 for x in range(0,8)]
target_set['trialID'] = target_set['trialID'].astype('string').astype(trial_cats)

target_set_group = target_set.groupby(['ModelID', 'MethodID'])

data_target = {}
data_trial = {}

for i in range(4):
    for j in range(0,2):
        group = target_set_group.get_group((i,j))
        data_target[(i, j)] = group['targetID'].iloc[0]
        data_trial[(i, j)] = group['trialID'].iloc[0]



In [26]:
new_nav = df['NavigationStream'].loc[(df['NavigationStream']['ModelID'] < 4), ['model_source', 'ModelID', 'MethodID', 'time']]
new_nav['time'] = pd.to_timedelta(new_nav['time'], unit='s')

groups = new_nav.groupby(['ModelID', 'MethodID'])

start_times = []
end_times = []

for i in range(4):
    for j in range(2,4):
        trial = groups.get_group((i, j))

        start = trial['time'].index[0]
        start_time = pd.to_timedelta(df['NavigationStream'].loc[start, 'time'], unit='s')

        end = trial['time'].index[-1]
        end_time = pd.to_timedelta(df['NavigationStream'].loc[end, 'time'], unit='s')
        
        start_times.append(start_time)
        end_times.append(end_time)

In [82]:
target_set = df['NavigationStream'].loc[(df['NavigationStream']['ModelID'] < 4), ['model_source', 'ModelID', 'MethodID', 'time']]
target_set['time'] = pd.to_timedelta(target_set['time'], unit='s')

target_set['trial_id'] = -1
target_set['creation_method'] = '-1'
target_set['target_set_source'] = '-1'

trials = []

for i in range(8):
    trial = target_set.loc[(target_set['time'] >= start_times[i]) & (target_set['time'] <= end_times[i])]
    trials.append(trial)

trials.sort(key=lambda x: x['time'].iloc[0])   

ids = [x%4 for x in range(0,8)]

target_trials = []

for i in range(8):
    trials[i].loc[:, 'trial_id'] = ids[i]

    model_source = trials[i]['model_source'].iloc[0]
    method = '-1'
    method_id = -1
    if model_source - 16 > 8:
        method = 'unimanual'
        method_id = 0
    else:
        method = 'bimanual'
        method_id = 1

    trials[i].loc[:, 'creation_method'] = method

    model_i = trials[i]['ModelID'].iloc[0]
    group = target_set_group.get_group((int(model_i),int(method_id)))

    target = group['targetID'].iloc[0]
    trials[i].loc[:, 'target_set_source'] = target
    target_trials.append(trials[i].head(1))


target_set = pd.concat(target_trials)

target_set['trial_id'] = target_set['trial_id'].astype('string').astype(trial_cats)
target_set['creation_method'] = target_set['creation_method'].astype('string').astype(method_cats)
target_set['target_set_source'] = target_set['target_set_source'].astype('string').astype(target_cats)


[   model_source  ModelID  MethodID                   time  trial_id  \
 0      20.00000  0.00000   2.00000 3 days 01:37:42.474600         0   
 
   creation_method target_set_source  
 0        bimanual                 2  ,
        model_source  ModelID  MethodID                   time  trial_id  \
 10478      24.00000  1.00000   2.00000 3 days 01:39:45.927300         1   
 
       creation_method target_set_source  
 10478        bimanual                 2  ,
        model_source  ModelID  MethodID                   time  trial_id  \
 19608      18.00000  2.00000   2.00000 3 days 01:41:34.131800         2   
 
       creation_method target_set_source  
 19608        bimanual                 2  ,
        model_source  ModelID  MethodID                   time  trial_id  \
 32014      17.00000  3.00000   2.00000 3 days 01:43:58.573000         3   
 
       creation_method target_set_source  
 32014        bimanual                 2  ,
        model_source  ModelID  MethodID             

In [28]:
target_set = df['NavigationStream'].loc[(df['NavigationStream']['ModelID'] < 4), ['model_source', 'ModelID', 'MethodID', 'time']]
target_set['time'] = pd.to_timedelta(target_set['time'], unit='s')

target_set['trial_id'] = 0
target_set['creation_method'] = 0
target_set['target_set'] = 0

#target_set['trial_id'] = target_set.mask(target_set['time'] < 'TargetsLoaded_Set1', target_cats.categories[0])['trial_id']

target_set

Unnamed: 0,model_source,ModelID,MethodID,time,trial_id,creation_method,target_set
0,20.0,0.0,2.0,3 days 01:37:42.474600,0,0,0
1,20.0,0.0,2.0,3 days 01:37:42.497600,0,0,0
2,20.0,0.0,2.0,3 days 01:37:42.519599999,0,0,0
3,20.0,0.0,2.0,3 days 01:37:42.541600,0,0,0
4,20.0,0.0,2.0,3 days 01:37:42.563700,0,0,0
...,...,...,...,...,...,...,...
94828,34.0,3.0,3.0,3 days 01:58:27.368300,0,0,0
94829,34.0,3.0,3.0,3 days 01:58:27.378500,0,0,0
94830,34.0,3.0,3.0,3 days 01:58:27.390100,0,0,0
94831,34.0,3.0,3.0,3 days 01:58:27.401200,0,0,0


## Navigation Linear Mixed Models

In [29]:
mm_nav_data = user_data_nav
mm_nav_data = mm_nav_data.reset_index(level=(0,1,2, 3))

mm_nav_data['block'] = mm_nav_data['block'].astype(block_cats).cat.codes
mm_nav_data['method'] = mm_nav_data['method'].astype(method_cats).cat.codes
mm_nav_data['model'] = mm_nav_data['model'].astype(model_cats).cat.codes

mm_nav_data['scaled_tepr'] = stats.zscore(mm_nav_data['adj_pupil']).astype('float64')
mm_nav_data['scaled_ipa'] = stats.zscore(mm_nav_data['IPA']).astype('float64')
mm_nav_data['scaled_time'] = stats.zscore(mm_nav_data['total_time']).astype('float64')
mm_nav_data['scaled_seq'] = stats.zscore(mm_nav_data['seq']).astype('float64')
mm_nav_data['scaled_discomfort'] = stats.zscore(mm_nav_data['discomfort']).astype('float64')

pd.options.display.float_format = '{:.5f}'.format
mm_nav_data.describe()

Unnamed: 0,block,model,method,luminance,pupilDiameter,pupil_lum_base,adj_pupil,total_time,IPA,discomfort,seq,scaled_tepr,scaled_ipa,scaled_time,scaled_seq,scaled_discomfort
count,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0,190.0
mean,1.34211,1.48947,0.50526,0.22543,3.67448,3.19926,0.47522,66.71027,0.17402,2.24737,0.71579,0.0,0.0,0.0,-0.0,-0.0
std,1.14726,1.12094,0.50129,0.02232,0.4961,0.48409,0.28883,24.66471,0.05669,2.38311,0.89281,1.00264,1.00264,1.00264,1.00264,1.00264
min,0.0,0.0,0.0,0.15815,2.81509,2.31899,-0.12002,24.1871,0.05486,0.0,0.0,-2.0663,-2.10744,-1.7286,-0.80385,-0.94553
25%,0.0,0.25,0.0,0.21228,3.28657,2.90119,0.23195,49.17595,0.13097,0.0,0.0,-0.84446,-0.76147,-0.71279,-0.80385,-0.94553
50%,1.0,1.0,1.0,0.22415,3.5674,3.08834,0.46525,60.06025,0.1695,1.0,0.0,-0.0346,-0.08,-0.27033,-0.80385,-0.5248
75%,2.0,2.0,1.0,0.2388,4.04028,3.4456,0.701,79.62245,0.20954,4.0,1.0,0.78375,0.62802,0.52489,0.31917,0.73738
max,3.0,3.0,1.0,0.29142,5.20517,4.24519,1.26835,172.9384,0.34251,9.0,4.0,2.75324,2.9797,4.31827,3.68824,2.84102


In [30]:
#plot discomfort per participant



In [31]:
# Responses: IPA
# Fixed: Method
# Controlling for time on task?

In [32]:
md_ipa = smf.mixedlm("scaled_ipa ~ C(method) + C(model) + C(block)" , mm_nav_data, groups=mm_nav_data["id"])
mdf_ipa = md_ipa.fit()
print(mdf_ipa.summary())

          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: scaled_ipa
No. Observations:  190     Method:             REML      
No. Groups:        24      Scale:              0.5882    
Min. group size:   7       Log-Likelihood:     -241.9446 
Max. group size:   8       Converged:          Yes       
Mean group size:   7.9                                   
---------------------------------------------------------
               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept      -0.013    0.254 -0.053 0.958 -0.512  0.485
C(method)[T.1] -0.283    0.111 -2.540 0.011 -0.501 -0.065
C(model)[T.1]   0.009    0.157  0.055 0.956 -0.298  0.315
C(model)[T.2]   0.059    0.157  0.372 0.710 -0.250  0.367
C(model)[T.3]  -0.070    0.157 -0.445 0.656 -0.379  0.239
C(block)[T.1]  -0.178    0.369 -0.483 0.629 -0.902  0.545
C(block)[T.2]   0.069    0.349  0.196 0.844 -0.616  0.753
C(block)[T.3]   0.823   

In [33]:
md_tepr = smf.mixedlm("scaled_tepr ~ C(method) + C(model) + C(block)", mm_nav_data, groups=mm_nav_data["id"])
mdf_tepr = md_tepr.fit()
print(mdf_tepr.summary())

          Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: scaled_tepr
No. Observations: 190     Method:             REML       
No. Groups:       24      Scale:              0.2573     
Min. group size:  7       Log-Likelihood:     -178.3187  
Max. group size:  8       Converged:          Yes        
Mean group size:  7.9                                    
---------------------------------------------------------
               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept       0.732    0.280  2.611 0.009  0.182  1.282
C(method)[T.1]  0.044    0.074  0.599 0.549 -0.100  0.188
C(model)[T.1]  -0.268    0.104 -2.585 0.010 -0.471 -0.065
C(model)[T.2]  -0.253    0.104 -2.424 0.015 -0.457 -0.048
C(model)[T.3]  -0.193    0.104 -1.850 0.064 -0.397  0.012
C(block)[T.1]  -1.495    0.437 -3.424 0.001 -2.350 -0.639
C(block)[T.2]  -0.491    0.413 -1.189 0.234 -1.302  0.319
C(block)[T.3]  -0.675   

In [34]:
md_seq = smf.mixedlm("scaled_seq ~ C(method) + C(model) + C(block)", mm_nav_data, groups=mm_nav_data["id"])
mdf_seq = md_seq.fit()
print(mdf_seq.summary())

          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: scaled_seq
No. Observations:  190     Method:             REML      
No. Groups:        24      Scale:              0.4736    
Min. group size:   7       Log-Likelihood:     -226.9982 
Max. group size:   8       Converged:          Yes       
Mean group size:   7.9                                   
---------------------------------------------------------
               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept      -0.007    0.279 -0.024 0.981 -0.554  0.540
C(method)[T.1]  0.660    0.100  6.601 0.000  0.464  0.855
C(model)[T.1]  -0.211    0.140 -1.499 0.134 -0.486  0.065
C(model)[T.2]  -0.333    0.141 -2.357 0.018 -0.610 -0.056
C(model)[T.3]  -0.261    0.141 -1.846 0.065 -0.538  0.016
C(block)[T.1]   0.081    0.421  0.192 0.848 -0.744  0.905
C(block)[T.2]  -0.318    0.398 -0.798 0.425 -1.098  0.463
C(block)[T.3]  -0.280   

In [35]:
md_fms = smf.mixedlm("scaled_discomfort ~ C(method) + C(model) + C(block)", mm_nav_data, groups=mm_nav_data["id"])
mdf_fms = md_fms.fit()
print(mdf_fms.summary())

             Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: scaled_discomfort
No. Observations: 190     Method:             REML             
No. Groups:       24      Scale:              0.1286           
Min. group size:  7       Log-Likelihood:     -126.4691        
Max. group size:  8       Converged:          Yes              
Mean group size:  7.9                                          
----------------------------------------------------------------
                 Coef.   Std.Err.    z     P>|z|  [0.025  0.975]
----------------------------------------------------------------
Intercept         0.359     0.339   1.057  0.291  -0.306   1.024
C(method)[T.1]    0.151     0.052   2.905  0.004   0.049   0.253
C(model)[T.1]     0.026     0.073   0.359  0.719  -0.117   0.170
C(model)[T.2]     0.015     0.074   0.204  0.839  -0.129   0.159
C(model)[T.3]     0.007     0.074   0.092  0.927  -0.138   0.151
C(block)[T.1]    -0.753     0.541  -1.392  0.

## Creation Linear Mixed Models

In [36]:
mm_crt_data = user_data_crt
mm_crt_data = mm_crt_data.reset_index(level=(0,1,2, 3))

mm_crt_data['block'] = mm_crt_data['block'].astype(block_cats).cat.codes
mm_crt_data['method'] = mm_crt_data['method'].astype(method_cats).cat.codes
mm_crt_data['model'] = mm_crt_data['model'].astype(model_cats).cat.codes

mm_crt_data['scaled_tepr'] = stats.zscore(mm_crt_data['adj_pupil']).astype('float64')
mm_crt_data['scaled_ipa'] = stats.zscore(mm_crt_data['IPA']).astype('float64')
mm_crt_data['scaled_time'] = stats.zscore(mm_crt_data['total_time']).astype('float64')
mm_crt_data['scaled_seq'] = stats.zscore(mm_crt_data['seq']).astype('float64')
mm_crt_data['total_interactions'] = mm_crt_data['Draw'] + mm_crt_data['Erase'] + mm_crt_data['PointPlaced'] + mm_crt_data['Move'] + mm_crt_data['PointDeleted']
mm_crt_data['scaled_interactions']  = stats.zscore(mm_crt_data['total_interactions']).astype('float64')

pd.options.display.float_format = '{:.5f}'.format
mm_crt_data.describe()

Unnamed: 0,block,model,method,luminance,pupilDiameter,pupil_lum_base,adj_pupil,total_time,IPA,seq,...,Move,Draw,Erase,PointDeleted,scaled_tepr,scaled_ipa,scaled_time,scaled_seq,total_interactions,scaled_interactions
count,192.0,192.0,192.0,192.0,192.0,192.0,192.0,192.0,192.0,192.0,...,192.0,192.0,192.0,192.0,192.0,192.0,192.0,192.0,192.0,192.0
mean,1.33333,1.5,2.5,0.24124,3.70347,3.17415,0.52932,57.13469,0.17865,0.88542,...,3.82292,2.32812,0.69792,0.20312,-0.0,0.0,-0.0,0.0,11.70312,0.0
std,1.1456,1.12096,0.50131,0.03488,0.59158,0.48764,0.33986,35.43374,0.06688,1.04225,...,8.01242,6.1031,2.65117,0.72023,1.00261,1.00261,1.00261,1.00261,12.50007,1.00261
min,0.0,0.0,2.0,0.16455,2.56291,2.26046,-0.14157,11.2614,0.04348,0.0,...,0.0,0.0,0.0,0.0,-1.97916,-2.02637,-1.29801,-0.85175,1.0,-0.85848
25%,0.0,0.75,2.0,0.20959,3.22292,2.89772,0.25809,33.88775,0.13654,0.0,...,0.0,0.0,0.0,0.0,-0.80015,-0.6312,-0.65778,-0.85175,2.0,-0.77828
50%,1.0,1.5,2.5,0.24039,3.63652,3.0934,0.50755,46.11395,0.17356,1.0,...,0.0,0.5,0.0,0.0,-0.06423,-0.07632,-0.31184,0.11023,8.5,-0.25692
75%,2.0,2.25,3.0,0.26883,4.197,3.43718,0.73177,69.8795,0.22099,2.0,...,2.25,2.0,0.0,0.0,0.59725,0.63476,0.36062,1.0722,15.0,0.26444
max,3.0,3.0,3.0,0.32524,5.37301,4.24859,1.71331,217.871,0.4011,4.0,...,38.0,61.0,19.0,4.0,3.49284,3.33492,4.54811,2.99615,79.0,5.3978


In [37]:
md_ipa_crt = smf.mixedlm("scaled_ipa ~ C(method) + C(model) + C(block)", mm_crt_data, groups=mm_crt_data["id"])
mdf_ipa_crt = md_ipa_crt.fit()
print(mdf_ipa_crt.summary())

          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: scaled_ipa
No. Observations:  192     Method:             REML      
No. Groups:        24      Scale:              0.7972    
Min. group size:   8       Log-Likelihood:     -266.4728 
Max. group size:   8       Converged:          Yes       
Mean group size:   8.0                                   
---------------------------------------------------------
               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept       0.018    0.236  0.074 0.941 -0.446  0.481
C(method)[T.3] -0.349    0.129 -2.705 0.007 -0.601 -0.096
C(model)[T.1]   0.304    0.182  1.668 0.095 -0.053  0.661
C(model)[T.2]   0.153    0.182  0.837 0.403 -0.205  0.510
C(model)[T.3]   0.210    0.182  1.154 0.249 -0.147  0.567
C(block)[T.1]  -0.079    0.320 -0.248 0.804 -0.706  0.547
C(block)[T.2]  -0.141    0.303 -0.466 0.641 -0.734  0.452
C(block)[T.3]   0.201   

In [38]:
md_seq_crt = smf.mixedlm("scaled_seq ~ C(method) + C(model) + C(block)", mm_crt_data, groups=mm_crt_data["id"])
mdf_seq_crt = md_seq_crt.fit()
print(mdf_seq_crt.summary())

          Mixed Linear Model Regression Results
Model:             MixedLM Dependent Variable: scaled_seq
No. Observations:  192     Method:             REML      
No. Groups:        24      Scale:              0.4663    
Min. group size:   8       Log-Likelihood:     -228.1481 
Max. group size:   8       Converged:          Yes       
Mean group size:   8.0                                   
---------------------------------------------------------
               Coef.  Std.Err.   z    P>|z| [0.025 0.975]
---------------------------------------------------------
Intercept       0.551    0.281  1.963 0.050  0.001  1.101
C(method)[T.3]  0.240    0.099  2.440 0.015  0.047  0.434
C(model)[T.1]  -0.220    0.139 -1.581 0.114 -0.494  0.053
C(model)[T.2]  -0.261    0.139 -1.869 0.062 -0.534  0.013
C(model)[T.3]  -0.441    0.139 -3.163 0.002 -0.714 -0.168
C(block)[T.1]  -0.715    0.424 -1.688 0.091 -1.546  0.115
C(block)[T.2]  -0.551    0.401 -1.373 0.170 -1.338  0.236
C(block)[T.3]  -0.740   