## Imports

In [2]:
import pyxdf 
import numpy as np
import pandas as pd
from pandas.api.types import CategoricalDtype
from lmfit.models import Model
from os import listdir, getcwd
from os.path import isfile, join
from scipy import stats
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.anova import AnovaRM
import statsmodels.formula.api as smf
import pywt
import math
import matplotlib.pyplot as plt
import statsmodels.graphics.factorplots as smgf

## Pupillary Functions


Task evoked pupillary response is calculated after correcting for luminance-induced pupil dilation: $𝑇𝐸𝑃𝑅 = 𝑑_m − 𝑑(𝑌)$, where $d_m$ is the measured pupil dilation, and $d(Y)$ is the predicted pupil dilation for the given luminance level. 

Predicted pupil dilation is calculated from a calibration sequence that produces and individual mapping model for each participant. The calibration sequence consists of 8 solid gray colors with varying luminance levels displayed in a psuedo-random order for 6 seconds each. The luminance levels span the range from 0.0 to 0.78, and for each calibration level, the first 0.5s of data is discarded to account for the initial pupillary response to the change in luminance, which can take a maximum of 0.5s. . The individual mapping model is calculated using a non-linear least squares regression to fit the equation $𝑑(𝑌) = 𝑎 · 𝑒^{−𝑏·𝑌} + c$ to the measured pupil dilation data for each participant. 

Pupil dilation data and the average luminance data were collected at 90 Hz, the display rate of the HMD.

See: Eckert, M., Robotham, T., Habets, E. A. P., and Rummukainen, O. S. (2022). Pupillary Light Reflex Correction for Robust Pupillometry in Virtual Reality. Proc. ACM Comput. Graph. Interact. Tech. 5, 1–16. doi: 10.1145/3530798

In [3]:
def pupil_func(x, a, b, c):
    return a * np.exp(-b * x) + c

In [4]:
def modmax(d):
    # compute signal modulus
    m = [0.0]*len(d)
    for i in range(len(d)):
        m[i] = math.fabs(d[i])
    # if value is larger than both neighbours , and strictly larger than either , then it is a local maximum
    t = [0.0]*len(d)
    for i in range(len(d)):
        ll = m[i -1] if i >= 1 else m[i]
        oo = m[i]
        rr = m[i+1] if i < len(d)-2 else m[i]
        if (ll <= oo and oo >= rr) and (ll < oo or oo > rr):
        # compute magnitude
            t[i] = math.sqrt(d[i]**2)
        else:
            t[i] = 0.0
    return t

In [5]:
def ipa_func(d):
    # obtain 2-level DWT of pupil diameter signal d
    try:
        (cA2 ,cD2 ,cD1) = pywt.wavedec(d,'sym16', 'per', level=2)
    except ValueError :
        return
    # get signal duration (in seconds)
    tt = d.index[-1] - d.index[0]
    # normalize by 1/2 j , j = 2 for 2-level DWT
    cA2 [:] = [x / math.sqrt (4.0) for x in cA2]
    cD1 [:] = [x / math.sqrt (2.0) for x in cD1]
    cD2 [:] = [x / math.sqrt (4.0) for x in cD2]

    # detect modulus maxima , see Listing 2
    cD2m = modmax(cD2)

    # threshold using universal threshold λuniv = σˆp(2logn)
    # where σˆ is the standard deviation of the noise
    λuniv = np.std(cD2m) * math.sqrt (2.0* np.log2(len(cD2m )))
    cD2t = pywt.threshold(cD2m ,λuniv,mode="hard")
    
    # compute IPA
    ctr = 0
    for i in range(len(cD2t )):
        if math.fabs(cD2t[i]) > 0: ctr += 1
    IPA = float(ctr)/tt.total_seconds()

    return IPA

## Statistical Functions

In [6]:
def iqr_outlier_indices(data):
    q1 = data.quantile(.25)
    q3 = data.quantile(.75)
    iqr = stats.iqr(data, nan_policy='omit', rng=(25, 75))
    return np.where((data < (q1 - 1.5 * iqr)) | (data > (q3 + 1.5 * iqr)))

In [7]:
def iqr_stats(data):
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = stats.iqr(data, nan_policy='omit', rng=(25, 75))
    return iqr, q1, q3

In [8]:
def get_results_colors(np, wp, tp):
    pastels = px.colors.qualitative.Pastel2
    default_color = 'white'
    significant_color = pastels[0]
    non_significant_color = pastels[3]

    normal_color = non_significant_color if np < 0.05 else significant_color
    wilcox_color = default_color
    ttest_color = default_color
    if np < 0.05:
        wilcox_color = significant_color if wp < 0.05 else non_significant_color
    else:
        ttest_color = significant_color if tp < 0.05 else non_significant_color

    fill_color = [[default_color, default_color, default_color],
                  [default_color, wilcox_color, ttest_color] , 
                  [normal_color, wilcox_color, ttest_color]]
    
    return fill_color

## Data Processing Functions

In [9]:
def import_data(file):
    streams, header = pyxdf.load_xdf(file)
    dfs = {}
    for stream in streams:
        stream_name = stream['info']['name'][0]
        stream_channels = {channel['label'][0]: i for i, channel in enumerate(stream['info']['desc'][0]['channels'][0]['channel'])}
        stream_data = stream['time_series']
        data_dict = {key: np.array(stream_data)[:, index] for key, index in stream_channels.items()}
        data_dict['time'] = np.round(np.array(stream['time_stamps']), decimals=4)
        dfs[stream_name] = pd.DataFrame(data_dict).drop_duplicates(subset=['time']).reset_index(drop=True)
    return dfs

In [95]:
accom_time = pd.to_timedelta(0.5, unit='s')

In [94]:
method_cats = CategoricalDtype(['4DoF','6DoF', 'unimanual','bimanual'], ordered=False)
model_cats = CategoricalDtype(['A', 'B', 'C', 'D'], ordered=True)
block_cats = CategoricalDtype(['0', '1', '2', '3'], ordered=True)
event_cats = CategoricalDtype(['Start', 'PointPlaced', 'Move', 'End', 'Draw', 'Erase', 'PointDeleted'], ordered=False)
target_cats = CategoricalDtype(['1','2'], ordered=False)
trial_cats = CategoricalDtype(['0','1','2','3'], ordered=True)
task_trial_cats = CategoricalDtype(['0','1','2','3', '4', '5', '6', '7'], ordered=True)
data_names = ['id', 'block', 'model', 'method']

In [12]:

mA = [0,1,3,2]
mB = [1,2,0,3]
mC = [2,3,1,0]
mD = [3,0,2,1]

# Task (method) presentation order
# Creation task (method 0 or 1) is always first 
tA = [0,1,2,3]
tB = [0,1,3,2]
tC = [1,0,2,3]
tD = [1,0,3,2]

model_blocks = [
    [mA,mB,mC,mD],
    [mB,mC,mD,mA],
    [mC,mD,mA,mB],
    [mD,mA,mB,mC]
]

method_blocks = [
    tA, tB, tC, tD
]

In [114]:
def process_gaze_luminance_data(stream_df):
    pupil = stream_df['GazeStream'].loc[(stream_df['GazeStream']['LeftEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['RightEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['LeftPupilDiameter'] > 0) 
                                        & (stream_df['GazeStream']['RightPupilDiameter'] > 0), 
                                        ['time', 'trial_id', 'task_trial_id', 'MethodID', 'ModelID', 'LeftPupilDiameter', 'RightPupilDiameter']]
    pupil['time'] = pd.to_timedelta(pupil['time'], unit='s')

    lum = stream_df['LuminanceStream'].loc[:, ['time', 'MethodID', 'ModelID', 'Luminance']]
    lum['time'] = pd.to_timedelta(lum['time'], unit='s')

    # Intersection of time stamps
    pupil_lum_time_intersection = np.intersect1d(pupil['time'], lum['time'])

    # Filter pupil and luminance data by intersection
    pupil = pupil[pupil['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)
    lum = lum[lum['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)

    # Combined DataFrame for pupil and luminance
    pupil_lum = pd.DataFrame({
        'time': pd.to_timedelta(pupil_lum_time_intersection, unit='s'),
        'luminance': lum['Luminance'],
        'pupilDiameter': 0.5 * (pupil['LeftPupilDiameter'] + pupil['RightPupilDiameter']),
        'methodID': pupil['MethodID'],
        'modelID': pupil['ModelID'],
        'trial_id': pupil['trial_id'],
        'task_trial_id': pupil['task_trial_id']
    })

    outliers = iqr_outlier_indices(pupil_lum['pupilDiameter'])
    pupil_lum = pupil_lum.drop(pupil_lum.iloc[outliers].index).reset_index(drop=True)

    return pupil_lum

In [14]:
def process_calibration_data(pupil_lum_df, stream_df):
    calibration_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['EventType'] == 'CalibrationColorChange') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'Calibration') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'CalibrationComplete'), 
                                                           ['time','SceneEvent', 'EventType']]
    calibration_events['time'] = pd.to_timedelta(calibration_events['time'], unit='s')
    c_start_times = calibration_events[:8]['time']
    c_end_times = calibration_events[1:]['time']
    c_start_times.reset_index(drop=True, inplace=True)
    c_end_times.reset_index(drop=True, inplace=True)

    calib_data = {}
    for i in range(8):
        calib_data[i] = pupil_lum_df.loc[(pupil_lum_df['time'] >= c_start_times[i]) & (pupil_lum_df['time'] <= c_end_times[i]), ['time','luminance', 'pupilDiameter']]
        calib_data[i]['time'] -= calib_data[i]['time'].iloc[0]
        calib_data[i] = calib_data[i].loc[(calib_data[i]['time'] >= accom_time), ['luminance', 'pupilDiameter']]

    calibration_data = pd.concat(calib_data).groupby(level=0).mean().sort_values(by=['luminance']).reset_index(drop=True)
    return calibration_data

In [128]:
def process_navigation_data(pupil_lum_df, stream_df, a, b, c):
    grouped_data = stream_df['NavigationStream'].groupby(['ModelID', 'MethodID'])

    stream_df['SurveyStream']['ModelID'] = stream_df['SurveyStream']['ModelID'].astype(float)
    stream_df['SurveyStream']['MethodID'] = stream_df['SurveyStream']['MethodID'].astype(float)
    
    discomfort_survey = stream_df['SurveyStream'].loc[
        (stream_df['SurveyStream']['SurveyType'] == 'Discomfort') & 
        (stream_df['SurveyStream']['ModelID'] < 4), 
        ['time', 'ModelID', 'MethodID']]
    survey_group = discomfort_survey.groupby(['ModelID', 'MethodID'])

    start_times = []
    end_times = []
    total_time = []

    for i in range(4):
        for j in range(2,4):
            trial = grouped_data.get_group((i, j))

            start = trial.loc[(trial['spline_percent'] > 0.001)].index[0]
            start_time = pd.to_timedelta(stream_df['NavigationStream'].loc[start, 'time'], unit='s')

            end = trial.loc[(trial['spline_percent'] > 0.995)]
            end_time = 0
            # For 6DoF navigation, completion was determined by collision with bounding box
            # Spline percentage was based on projection, so it may not reach > 0.995.
            # In this case, the survey time serves as the end time (rather than lowering the threshold)
            if len(end) > 0:
                end = end.index[0]
                end_time = pd.to_timedelta(stream_df['NavigationStream'].loc[end, 'time'], unit='s')
            else:
                end = survey_group.get_group((i, j)).index[0]
                end_time = pd.to_timedelta(stream_df['SurveyStream'].loc[end, 'time'], unit='s') - pd.offsets.Second(3)
            
            start_times.append(start_time)
            end_times.append(end_time)
            total_time.append((end_time - start_time).total_seconds())


    nav_start_times = start_times
    nav_end_times = end_times

    nav_data = {}
    for i in range(8):
        nav_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['luminance'] >0) & 
            (pupil_lum_df['time']>nav_start_times[i]) & 
            (pupil_lum_df['time']<nav_end_times[i]), 
            ['time', 'trial_id', 'task_trial_id', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        nav_data[i].reset_index(drop=True, inplace=True)

    navigation_data = pd.concat(nav_data, names=['trial'])
    navigation_data['pupil_lum_base'] = pupil_func(navigation_data['luminance'], a, b, c)
    navigation_data['adj_pupil'] = navigation_data['pupilDiameter'] - navigation_data['pupil_lum_base']

    return navigation_data, total_time

In [108]:
def clean_stream(stream_df, block, range_frame):
    df = pd.DataFrame()

    for col in stream_df.columns:
        df[col] = stream_df[col]
    
    stream_df['ModelID'] = stream_df['ModelID'].mask(stream_df['ModelID'] == 'generic', '99')
    stream_df['MethodID'] = stream_df['MethodID'].mask(stream_df['MethodID'] == 'generic', '99')
    df['ModelID'] = stream_df['ModelID'].astype(int)
    df['trial_id'] = stream_df['ModelID'].astype(int)
    df['MethodID'] = stream_df['MethodID'].astype(int)
    df['task_trial_id'] = df['trial_id']
    df['task_trial_id'] = df['task_trial_id'].mask(df['task_trial_id'] == 4, 99)

    for j in range_frame:
        m = 0 if j == range_frame[0] else 4
        for i in range(4):
            method = method_blocks[block][j]
            current_trial_id = df.loc[(df['trial_id']==i) & (df['MethodID']==method), 'trial_id']
            df['ModelID'] = df['ModelID'].mask((df['trial_id']==i) & (df['MethodID']==method), model_blocks[block][j][i])
            df.loc[(df['trial_id']==i) & (df['MethodID']==method), 'task_trial_id'] = m + current_trial_id
    
    return df

In [129]:
def process_creation_data(pupil_lum_df, stream_df, a, b, c):
    
    crt_start_times = stream_df['CreationStream'].loc[(stream_df['CreationStream']['EventName'] == 'StartPointRegistered'), ['time', 'ModelID', 'MethodID']]
    crt_start_times = pd.to_timedelta(crt_start_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s') + pd.offsets.Second(2)
    crt_start_times.reset_index(drop=True, inplace=True)

    crt_end_times = stream_df['CreationStream'].loc[(stream_df['CreationStream']['EventName'] == 'FinishPath'), ['time', 'ModelID', 'MethodID']]
    crt_end_times = pd.to_timedelta(crt_end_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s')
    crt_end_times.reset_index(drop=True, inplace=True)
    
    total_time = crt_end_times - crt_start_times
    total_time = total_time.apply(lambda x: x.total_seconds())

    crt_data = {}
    for i in range(8):
        crt_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['time'] > crt_start_times.loc[i]) & 
            (pupil_lum_df['time'] < crt_end_times.loc[i]), 
            ['time', 'methodID',  'trial_id',  'task_trial_id', 'modelID', 'luminance', 'pupilDiameter']]
        crt_data[i].reset_index(drop=True, inplace=True)

    creation_data = pd.concat(crt_data, names=['trial'])
    creation_data['pupil_lum_base'] = pupil_func(creation_data['luminance'], a, b, c)
    creation_data['adj_pupil'] = creation_data['pupilDiameter'] - creation_data['pupil_lum_base']
    
    return creation_data, total_time

In [18]:
def process_creation_stats(stream_df):

    group = stream_df['CreationStream'].groupby(['ModelID', 'MethodID'])
    creation_counts  = []

    for i in range(4):
            for j in range(0,2):
                trial = group.get_group((i,j))
                creation_counts.append(trial.groupby('EventType', observed=False).size().fillna(0))
    
    keys = [(i,j) for i in range(4) for j in range(0,2)]
    creation_stats = pd.concat(creation_counts, axis=0, keys=keys, names=['ModelID', 'MethodID']).unstack(level=2)
    creation_stats = creation_stats.drop(columns=['End', 'Start'])
    return creation_stats

In [19]:
def process_target_trial_data_crt(stream_df):
    target_set = stream_df['ExperimentStream'].loc[
        (stream_df['ExperimentStream']['EventType'] == 'TargetPointsSet') | 
        (stream_df['ExperimentStream']['EventType'] == 'SceneLoaded') & 
        (stream_df['ExperimentStream']['SceneEvent'] != 'Calibration') & 
        (stream_df['ExperimentStream']['MethodID'] != 2) & 
        (stream_df['ExperimentStream']['MethodID'] != 3), ['SceneEvent', 'EventType', 'ModelID', 'MethodID']]

    generic_idx = np.where(target_set['ModelID'] == 99)[0]
    idx = [x+1 for x in generic_idx]

    for i in range(len(generic_idx)):
        target_set.loc[target_set.index[generic_idx[i]], 'ModelID']  = target_set.loc[target_set.index[idx[i]], 'ModelID']
        target_set.loc[target_set.index[generic_idx[i]], 'MethodID']  = target_set.loc[target_set.index[idx[i]], 'MethodID']

    target_set = target_set.loc[(target_set['ModelID'] < 4) & (target_set['EventType'] == 'TargetPointsSet'), ['SceneEvent', 'ModelID', 'MethodID']]

    target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set1', target_cats.categories[0])['SceneEvent']
    target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set2', target_cats.categories[1])['SceneEvent']

    target_set['targetID'] = target_set['SceneEvent']
    target_set.drop(columns=['SceneEvent'], inplace=True)

    return target_set


In [20]:
def process_target_trial_data_nav(stream_df, target_set_group):
    target_set = stream_df['NavigationStream'].loc[(stream_df['NavigationStream']['ModelID'] < 4), ['model_source', 'ModelID', 'MethodID']]

    groups = target_set.groupby(['ModelID', 'MethodID'])

    target_set['creation_method'] = '-1'
    target_set['target_set_source'] = '-1'

    target_trials = []

    for i in range(4):
        for j in range(2,4):
            trial = groups.get_group((i, j))
            model_source = trial['model_source'].iloc[0]
            model_value16 = model_source - 16
            method = '-1'
            method_id = -1

            if model_value16 > 9:
                method = 'bimanual'
                method_id = 1
            else:
                method = 'unimanual'
                method_id = 0

            trial.loc[:, 'creation_method'] = method

            model_i = trial['ModelID'].iloc[0]
            group = target_set_group.get_group((int(model_i),int(method_id)))

            target = group['targetID'].iloc[0]
            trial.loc[:, 'target_set_source'] = target
            target_trials.append(trial.iloc[:1])

    target_set = pd.concat(target_trials)

    target_set['creation_method'] = target_set['creation_method'].astype('string').astype(method_cats)
    target_set['target_set_source'] = target_set['target_set_source'].astype('string').astype(target_cats)
    target_set.drop(columns=['model_source'], inplace=True)
    target_set.reset_index(drop=True, inplace=True)

    return target_set

In [21]:
def process_discomfort_data(stream_df):
    discomfort_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'Discomfort', ['time', 'Value', 'ModelID', 'MethodID']]
    discomfort_values['time'] = pd.to_timedelta(discomfort_values['time'], unit='s')
    discomfort_values.reset_index(drop=True, inplace=True)
    return discomfort_values

In [22]:
def process_seq_data(stream_df):
    seq_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'SEQ', ['time', 'Value', 'ModelID', 'MethodID']]
    seq_values['time'] = pd.to_timedelta(seq_values['time'], unit='s')
    seq_values.reset_index(drop=True, inplace=True)
    return seq_values

In [23]:
def process_ipa_calc(data):
    methods = []
    models = []
    ipa = []
    for i in range(8):
        methods.append(data.loc[i]['methodID'].iloc[i])
        models.append(data.loc[i]['modelID'].iloc[i])
        pupil = data.loc[i]['pupilDiameter']
        pupil.index = data.loc[i]['time']
        ipa.append(ipa_func(pupil))
        
    return pd.DataFrame({'methodID': methods, 'modelID': models, 'IPA': ipa})

## Import Data

In [90]:
data_dir = join(getcwd(),'Path_Data')
data_files = [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f))]

In [109]:
dfs = []
for file in data_files:
    df = import_data(file)
    
    # Remove final empty row from string data streams
    df['SurveyStream'] = df['SurveyStream'].replace(r'^\s*$', np.nan, regex=True).dropna()
    df['CreationStream'] = df['CreationStream'].replace(r'^\s*$', np.nan, regex=True).dropna()
    df['ExperimentStream'] = df['ExperimentStream'].replace(r'^\s*$', np.nan, regex=True).dropna()

    df['SurveyStream']['ModelID'] = df['SurveyStream']['ModelID'].astype(float)
    df['SurveyStream']['MethodID'] = df['SurveyStream']['MethodID'].astype(float)
    df['SurveyStream']['Value'] = df['SurveyStream']['Value'].astype(float)

    df['CreationStream']['ModelID'] = df['CreationStream']['ModelID'].astype(float)
    df['CreationStream']['MethodID'] = df['CreationStream']['MethodID'].astype(float)
    df['CreationStream']['EventType'] = df['CreationStream']['EventType'].astype(event_cats)

    block = int(df['ExperimentStream']['BlockID'][0])
    
    df['GazeStream'] = clean_stream(df['GazeStream'], block, range(4))
    df['LuminanceStream'] = clean_stream(df['LuminanceStream'], block, range(4))
    df['NavigationStream'] = clean_stream(df['NavigationStream'], block, range(2,4))
    df['CreationStream'] = clean_stream(df['CreationStream'], block, range(2))
    df['PoseStream'] = clean_stream(df['PoseStream'], block, range(4))
    df['SurveyStream'] = clean_stream(df['SurveyStream'], block, range(4))
    df['TrackedPoseStream'] = clean_stream(df['TrackedPoseStream'], block, range(4))
    df['ExperimentStream'] = clean_stream(df['ExperimentStream'], block, range(4))
    dfs.append(df)

## Process Data

In [130]:
user_dfs_nav = []
user_dfs_crt = []
ids = []
blocks = []

for df in dfs:
    id = df['ExperimentStream']['UserID'][0]
    block = df['ExperimentStream']['BlockID'][0]
    ids.append(id)
    blocks.append(block)

    pupil_lum_df = process_gaze_luminance_data(df)
    calibration_data = process_calibration_data(pupil_lum_df, df)

    # Fit pupil response to luminance
    x_data = calibration_data['luminance']
    y_data = calibration_data['pupilDiameter']
    exp_mod = Model(pupil_func)
    params = exp_mod.make_params(a=1, b=4, c=0)
    result = exp_mod.fit(y_data, params, x=x_data)
    a = result.params['a'].value
    b = result.params['b'].value
    c = result.params['c'].value

    navigation_data, nav_time = process_navigation_data(pupil_lum_df, df, a, b, c)
    ipa_calc_nav = process_ipa_calc(navigation_data)
    creation_data, crt_time = process_creation_data(pupil_lum_df, df, a, b, c)
    ipa_calc_crt = process_ipa_calc(creation_data)
    creation_stats = process_creation_stats(df)
    discomfort = process_discomfort_data(df)
    seq = process_seq_data(df)

    creation_target_trials = process_target_trial_data_crt(df)
    crt_target_trials = creation_target_trials.groupby(['ModelID', 'MethodID'])
    navigation_target_trials = process_target_trial_data_nav(df, crt_target_trials)
    nav_target_trials = navigation_target_trials.groupby(['ModelID', 'MethodID'])

    nav_trials = navigation_data.groupby(['modelID', 'methodID'])
    ipa_nav_trials = ipa_calc_nav.groupby(['modelID', 'methodID'])
    crt_trials = creation_data.groupby(['modelID', 'methodID'])
    ipa_crt_trials = ipa_calc_crt.groupby(['modelID', 'methodID'])
    discomfort_trials = discomfort.groupby(['ModelID', 'MethodID'])
    seq_trials = seq.groupby(['ModelID', 'MethodID'])
    
    nav_data = {}
    nav_total_time = {}
    ipa_nav_data = {}
    crt_data = {}
    crt_total_time = {}
    ipa_crt_data = {}
    discomfort_data = {}
    seq_crt = {}
    seq_nav = {}
    target_data_crt = {}
    target_data_nav = {}
    creation_data_nav = {}

    for i in range(4):
        for j in range(2,4):
            nav_data[(id, block, i, j)] = nav_trials.get_group((i,j)).mean()
            nav_total_time[(id, block, i, j)] = nav_time[(2*i + (j-2))]
            ipa_nav_data[(id, block, i, j)] = ipa_nav_trials.get_group((i,j)).mean()
            discomfort_data[(id, block, i, j)] = discomfort_trials.get_group((i,j)).mean()
            seq_nav[(id, block, i, j)] = seq_trials.get_group((i,j)).mean()
            target_data_nav[(id, block, i, j)] = nav_target_trials.get_group((i,j))['target_set_source'].iloc[0]
            creation_data_nav[(id, block, i, j)] = nav_target_trials.get_group((i,j))['creation_method'].iloc[0]
    
    for i in range(4):
        for j in range(0,2):
            crt_data[(id, block, i, j)] = crt_trials.get_group((i,j)).mean()
            crt_total_time[(id, block, i, j)] = crt_time[(2*i + j)]
            ipa_crt_data[(id, block, i, j)] = ipa_crt_trials.get_group((i,j)).mean()
            seq_crt[(id, block, i, j)] = seq_trials.get_group((i,j)).mean()
            target_data_crt[(id, block, i, j)] = crt_target_trials.get_group((i,j))['targetID'].iloc[0]
    
    nav_index = pd.MultiIndex.from_product([[id], [block], model_cats.categories, method_cats.categories[0:2]], names=data_names)
    crt_index = pd.MultiIndex.from_product([[id], [block], model_cats.categories, method_cats.categories[2:4]], names=data_names)

    nav_data = pd.concat(nav_data, axis=1, names=data_names).T
    nav_data['total_time'] = nav_total_time
    nav_data['target_source'] = target_data_nav
    nav_data['creation_method'] = creation_data_nav
    nav_data.index = nav_index
    nav_data.drop(columns=['time', 'modelID', 'methodID'], inplace=True)
    
    crt_data = pd.concat(crt_data, axis=1, names=data_names).T
    crt_data['total_time'] = crt_total_time
    crt_data['target_id'] = target_data_crt
    crt_data.index = crt_index
    crt_data.drop(columns=['time', 'modelID', 'methodID'], inplace=True)

    creation_stats.index = crt_index

    ipa_nav_data = pd.concat(ipa_nav_data, axis=1, names=data_names).T
    ipa_nav_data.index = nav_index
    ipa_nav_data.drop(columns=['modelID', 'methodID'], inplace=True)

    ipa_crt_data = pd.concat(ipa_crt_data, axis=1, names=data_names).T
    ipa_crt_data.index = crt_index
    ipa_crt_data.drop(columns=['modelID', 'methodID'], inplace=True)

    discomfort_data = pd.concat(discomfort_data, axis=1, names=data_names).T
    discomfort_data.index = nav_index
    discomfort_data['discomfort'] = discomfort_data['Value']
    discomfort_data.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    seq_nav = pd.concat(seq_nav, axis=1, names=data_names).T
    seq_nav.index = nav_index
    seq_nav['seq'] = seq_nav['Value']
    seq_nav.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    seq_crt = pd.concat(seq_crt, axis=1, names=data_names).T
    seq_crt.index = crt_index
    seq_crt['seq'] = seq_crt['Value']
    seq_crt.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    df_crt = pd.concat([crt_data, ipa_crt_data, seq_crt, creation_stats], axis=0).stack().unstack()
    df_crt.loc[(slice(None), slice(None), slice(None), slice(None)), ('Draw', 'Erase', 'PointPlaced', 'Move', 'PointDeleted')] = df_crt.loc[(slice(None), slice(None), slice(None), slice(None)), ('Draw', 'Erase', 'PointPlaced', 'Move', 'PointDeleted')].astype(int)
    df_crt.loc[(slice(None), slice(None), slice(None), 'unimanual'), ('Draw', 'Erase')] = df_crt.loc[(slice(None), slice(None), slice(None), 'unimanual'), ('Draw', 'Erase')].astype(int).fillna(0)
    df_crt.loc[(slice(None), slice(None), slice(None), 'bimanual'), ('PointPlaced', 'Move', 'PointDeleted')] = df_crt.loc[(slice(None), slice(None), slice(None), 'bimanual'), ('PointPlaced', 'Move', 'PointDeleted')].astype(int).fillna(0)

    df_nav = pd.concat([nav_data, ipa_nav_data, discomfort_data, seq_nav], axis=0).stack().unstack()

    user_dfs_nav.append(df_nav)
    user_dfs_crt.append(df_crt)


In [131]:
user_data_nav = pd.concat(user_dfs_nav)
nav_dtype = {'luminance': 'float64', 'pupilDiameter': 'float64', 'pupil_lum_base': 'float64', 'adj_pupil': 'float64', 'IPA': 'float64', 'discomfort': 'int32', 'seq': 'int32', 'total_time': 'float64'}
user_data_nav = user_data_nav.astype(nav_dtype)

short_nav = user_data_nav['total_time'] < 10
user_data_nav = user_data_nav[~short_nav]

user_data_crt = pd.concat(user_dfs_crt)
crt_dtype = {'luminance' : 'float64', 'pupilDiameter' : 'float64', 'pupil_lum_base' : 'float64', 'adj_pupil' : 'float64', 'IPA' : 'float64', 'seq' : 'int32', 'PointPlaced' : 'int32', 'Move' : 'int32', 'Draw' : 'int32', 'Erase' : 'int32', 'PointDeleted' : 'int32', 'total_time': 'float64'}
user_data_crt = user_data_crt.astype(crt_dtype)

short_crt = user_data_crt['total_time'] < 10
user_data_crt = user_data_crt[~short_crt]

In [132]:
user_data_nav

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,trial_id,task_trial_id,luminance,pupilDiameter,pupil_lum_base,adj_pupil,total_time,target_source,creation_method,IPA,discomfort,seq
id,block,model,method,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
108,0,A,4DoF,3.00000,7.00000,0.22436,4.86646,3.83158,1.03488,37.08220,1,unimanual,0.24285,2,0
108,0,A,6DoF,1.00000,5.00000,0.22551,4.35948,3.82915,0.53034,61.00430,2,bimanual,0.09839,1,0
108,0,B,4DoF,2.00000,6.00000,0.24224,4.65088,3.77130,0.87958,67.70050,1,unimanual,0.14776,1,0
108,0,B,6DoF,3.00000,7.00000,0.25618,4.43714,3.74668,0.69046,96.43160,2,bimanual,0.19708,1,0
108,0,C,4DoF,0.00000,4.00000,0.22350,4.65890,3.85290,0.80600,88.03560,1,unimanual,0.22724,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,3,B,6DoF,0.00000,4.00000,0.25051,3.91653,3.20039,0.71614,50.55280,1,unimanual,0.13853,1,0
143,3,C,4DoF,0.00000,4.00000,0.23244,3.68889,3.23776,0.45113,57.98340,2,bimanual,0.17253,2,0
143,3,C,6DoF,1.00000,5.00000,0.22828,3.69899,3.25161,0.44738,45.80070,1,unimanual,0.24181,1,0
143,3,D,4DoF,1.00000,5.00000,0.22978,3.76151,3.23712,0.52439,64.95840,2,bimanual,0.15400,3,0


In [133]:
user_data_crt

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,trial_id,task_trial_id,luminance,pupilDiameter,pupil_lum_base,adj_pupil,total_time,target_id,IPA,seq,PointPlaced,Move,Draw,Erase,PointDeleted
id,block,model,method,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
108,0,A,bimanual,2.00000,6.00000,0.28084,4.40283,3.65164,0.75118,98.30300,2,0.11191,1,19,6,0,0,0
108,0,A,unimanual,0.00000,0.00000,0.25939,5.10438,3.71714,1.38724,62.76520,1,0.10701,2,0,0,4,0,0
108,0,B,bimanual,0.00000,4.00000,0.32082,4.18903,3.54098,0.64805,95.21040,2,0.13658,2,17,3,0,0,0
108,0,B,unimanual,1.00000,1.00000,0.26331,4.83574,3.71454,1.12120,40.44160,1,0.17314,2,0,0,4,0,0
108,0,C,bimanual,1.00000,5.00000,0.23760,4.37592,3.78364,0.59229,36.44090,2,0.24704,1,12,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,3,B,unimanual,1.00000,5.00000,0.29194,4.17442,3.12937,1.04505,25.36290,1,0.31573,1,0,0,5,0,0
143,3,C,bimanual,2.00000,2.00000,0.27263,4.10501,3.16439,0.94062,65.67570,2,0.12186,1,8,10,0,0,0
143,3,C,unimanual,3.00000,7.00000,0.22582,4.16704,3.26014,0.90691,17.34620,1,0.23081,1,0,0,1,0,0
143,3,D,bimanual,0.00000,0.00000,0.21186,4.03511,3.30591,0.72920,63.36400,2,0.11051,0,5,12,0,0,0


## Navigation Linear Mixed Models

In [283]:
mm_nav_data = user_data_nav
mm_nav_data = mm_nav_data.reset_index(level=(0,1,2,3))

mm_nav_data['block'] = mm_nav_data['block'].astype(block_cats).cat.codes
mm_nav_data['method'] = mm_nav_data['method'].astype(method_cats).cat.codes
mm_nav_data['model'] = mm_nav_data['model'].astype(model_cats).cat.codes
mm_nav_data['creation_method'] = mm_nav_data['creation_method'].astype(method_cats).cat.codes
mm_nav_data['target_source'] = mm_nav_data['target_source'].astype(int)
mm_nav_data['trial_id'] = mm_nav_data['trial_id'].astype(int)
mm_nav_data['scaled_tepr'] = stats.zscore(mm_nav_data['adj_pupil']).astype(float)
mm_nav_data['scaled_ipa'] = stats.zscore(mm_nav_data['IPA']).astype(float)
mm_nav_data['scaled_time'] = stats.zscore(mm_nav_data['total_time']).astype(float)
mm_nav_data['scaled_seq'] = stats.zscore(mm_nav_data['seq']).astype(float)
mm_nav_data['scaled_discomfort'] = stats.zscore(mm_nav_data['discomfort']).astype(float)

pd.options.display.float_format = '{:.5f}'.format
mm_nav_data

Unnamed: 0,id,block,model,method,trial_id,task_trial_id,luminance,pupilDiameter,pupil_lum_base,adj_pupil,...,target_source,creation_method,IPA,discomfort,seq,scaled_tepr,scaled_ipa,scaled_time,scaled_seq,scaled_discomfort
0,108,0,0,0,3,7.00000,0.22436,4.86646,3.83158,1.03488,...,1,2,0.24285,2,0,2.02620,1.25553,-1.16329,-0.77209,-0.17558
1,108,0,0,1,1,5.00000,0.22551,4.35948,3.82915,0.53034,...,2,3,0.09839,1,0,0.26970,-1.36817,-0.15795,-0.77209,-0.57355
2,108,0,1,0,2,6.00000,0.24224,4.65088,3.77130,0.87958,...,1,2,0.14776,1,0,1.48554,-0.47153,0.12346,-0.77209,-0.57355
3,108,0,1,1,3,7.00000,0.25618,4.43714,3.74668,0.69046,...,2,3,0.19708,1,0,0.82714,0.42423,1.33090,-0.77209,-0.57355
4,108,0,2,0,0,4.00000,0.22350,4.65890,3.85290,0.80600,...,1,2,0.22724,1,1,1.22940,0.97202,0.97805,0.37639,-0.57355
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,143,3,1,1,0,4.00000,0.25051,3.91653,3.20039,0.71614,...,1,2,0.13853,1,0,0.91654,-0.63915,-0.59718,-0.77209,-0.57355
234,143,3,2,0,0,4.00000,0.23244,3.68889,3.23776,0.45113,...,2,3,0.17253,2,0,-0.00605,-0.02167,-0.28491,-0.77209,-0.17558
235,143,3,2,1,1,5.00000,0.22828,3.69899,3.25161,0.44738,...,1,2,0.24181,1,0,-0.01910,1.23679,-0.79689,-0.77209,-0.57355
236,143,3,3,0,1,5.00000,0.22978,3.76151,3.23712,0.52439,...,2,3,0.15400,3,0,0.24899,-0.35815,0.00822,-0.77209,0.22240


In [135]:
import plotly.express as px

In [272]:
#plot scaled_ipa vs trial_id for each participant and method
fig = px.scatter(mm_nav_data, x='trial_id', y='discomfort', color='trial_id', facet_col='id', facet_row='method')
fig.show()

In [137]:
endog = mm_nav_data.loc[:, ['scaled_tepr', 'scaled_ipa', 'scaled_seq', 'scaled_discomfort', 'scaled_time']]
exog = mm_nav_data.loc[:, ['block', 'method', 'model', 'creation_method', 'target_source', 'trial_id']]

In [138]:
vif = sm.add_constant(exog)
vif_value = pd.Series([variance_inflation_factor(vif.values, i) for i in range(vif.shape[1])], index=vif.columns)
vif_value

const             38.81247
block              1.00011
method             1.00762
model              1.00177
creation_method    1.00866
target_source      1.00762
trial_id           1.00191
dtype: float64

In [278]:
md_discomfort_full = smf.mixedlm("scaled_discomfort ~ method + trial_id" , mm_nav_data, groups=mm_nav_data["id"], re_formula="~1 + trial_id")
mdf_discomfort_full = md_discomfort_full.fit()
mdf_discomfort_full.summary()


The MLE may be on the boundary of the parameter space.



0,1,2,3
Model:,MixedLM,Dependent Variable:,scaled_discomfort
No. Observations:,238,Method:,REML
No. Groups:,30,Scale:,0.1337
Min. group size:,7,Log-Likelihood:,-166.1876
Max. group size:,8,Converged:,Yes
Mean group size:,7.9,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,-0.095,0.166,-0.573,0.567,-0.421,0.231
method,0.080,0.047,1.694,0.090,-0.013,0.173
trial_id,0.034,0.026,1.339,0.181,-0.016,0.085
Group Var,0.765,0.629,,,,
Group x trial_id Cov,0.034,0.065,,,,
trial_id Var,0.006,0.015,,,,


## Creation Linear Mixed Models

In [233]:
mm_crt_data = user_data_crt
mm_crt_data = mm_crt_data.reset_index(level=(0,1,2, 3))

mm_crt_data['block'] = mm_crt_data['block'].astype(block_cats).cat.codes
mm_crt_data['method'] = mm_crt_data['method'].astype(method_cats).cat.codes
mm_crt_data['model'] = mm_crt_data['model'].astype(model_cats).cat.codes
mm_crt_data['target_id'] = mm_crt_data['target_id'].astype(int)
mm_crt_data['trial_id'] = mm_crt_data['trial_id'].astype(int)
mm_crt_data['scaled_tepr'] = stats.zscore(mm_crt_data['adj_pupil']).astype('float64')
mm_crt_data['scaled_ipa'] = stats.zscore(mm_crt_data['IPA']).astype('float64')
mm_crt_data['scaled_time'] = stats.zscore(mm_crt_data['total_time']).astype('float64')
mm_crt_data['scaled_seq'] = stats.zscore(mm_crt_data['seq']).astype('float64')
mm_crt_data['total_interactions'] = mm_crt_data['Draw'] + mm_crt_data['Erase'] + mm_crt_data['PointPlaced'] + mm_crt_data['Move'] + mm_crt_data['PointDeleted']
mm_crt_data['scaled_interactions']  = stats.zscore(mm_crt_data['total_interactions']).astype('float64')

pd.options.display.float_format = '{:.5f}'.format
mm_crt_data

Unnamed: 0,id,block,model,method,trial_id,task_trial_id,luminance,pupilDiameter,pupil_lum_base,adj_pupil,...,Move,Draw,Erase,PointDeleted,scaled_tepr,scaled_ipa,scaled_time,scaled_seq,total_interactions,scaled_interactions
0,108,0,0,3,2,6.00000,0.28084,4.40283,3.65164,0.75118,...,6,0,0,0,0.62236,-0.99902,1.19729,0.13959,25,1.05887
1,108,0,0,2,0,0.00000,0.25939,5.10438,3.71714,1.38724,...,0,4,0,0,2.53022,-1.07406,0.18585,1.12496,4,-0.61987
2,108,0,1,3,0,4.00000,0.32082,4.18903,3.54098,0.64805,...,3,0,0,0,0.31300,-0.62139,1.10928,1.12496,20,0.65917
3,108,0,1,2,1,1.00000,0.26331,4.83574,3.71454,1.12120,...,0,4,0,0,1.73223,-0.06182,-0.44950,1.12496,4,-0.61987
4,108,0,2,3,1,5.00000,0.23760,4.37592,3.78364,0.59229,...,1,0,0,0,0.14575,1.06937,-0.56337,0.13959,13,0.09959
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,143,3,1,2,1,5.00000,0.29194,4.17442,3.12937,1.04505,...,0,5,0,0,1.50381,2.12083,-0.87866,0.13959,5,-0.53993
236,143,3,2,3,2,2.00000,0.27263,4.10501,3.16439,0.94062,...,10,0,0,0,1.19057,-0.84681,0.26869,0.13959,18,0.49929
237,143,3,2,2,3,7.00000,0.22582,4.16704,3.26014,0.90691,...,0,1,0,0,1.08945,0.82101,-1.10682,0.13959,1,-0.85969
238,143,3,3,3,0,0.00000,0.21186,4.03511,3.30591,0.72920,...,12,0,0,0,0.55642,-1.02043,0.20289,-0.84577,17,0.41935
