## Imports

In [1]:
import pyxdf 
import numpy as np
import pandas as pd
from lmfit.models import Model
from os import listdir, getcwd
from os.path import isfile, join
from scipy import stats
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pywt
import math
from pandas.api.types import CategoricalDtype

## Pupillary Functions


Task evoked pupillary response is calculated after correcting for luminance-induced pupil dilation: $𝑇𝐸𝑃𝑅 = 𝑑_m − 𝑑(𝑌)$, where $d_m$ is the measured pupil dilation, and $d(Y)$ is the predicted pupil dilation for the given luminance level. 

Predicted pupil dilation is calculated from a calibration sequence that produces and individual mapping model for each participant. The calibration sequence consists of 8 solid gray colors with varying luminance levels displayed in a psuedo-random order for 6 seconds each. The luminance levels span the range from 0.0 to 0.78, and for each calibration level, the first 0.5s of data is discarded to account for the initial pupillary response to the change in luminance, which can take a maximum of 0.5s. . The individual mapping model is calculated using a non-linear least squares regression to fit the equation $𝑑(𝑌) = 𝑎 · 𝑒^{−𝑏·𝑌} + c$ to the measured pupil dilation data for each participant. 

Pupil dilation data and the average luminance data were collected at 90 Hz, the display rate of the HMD.

See: Eckert, M., Robotham, T., Habets, E. A. P., and Rummukainen, O. S. (2022). Pupillary Light Reflex Correction for Robust Pupillometry in Virtual Reality. Proc. ACM Comput. Graph. Interact. Tech. 5, 1–16. doi: 10.1145/3530798

In [2]:
def pupil_func(x, a, b, c):
    return a * np.exp(-b * x) + c

#### Index of Pupillary Activity (IPA)

The frequency of pupil diameter oscilation over time is an indicator of cognitive load measured as the index of pupillary activity (IPA). It is an open-source alternative to the Index of Cognitive Activity. The IPA is implemented as described in the original paper, using a wavelet decomposition of the pupil diameter. The wavelet function is symlet8, because the signal was sampled at 90 Hz (rather than 250 Hz, as in the original paper).  

See: 
Duchowski, A. T., Krejtz, K., Krejtz, I., Biele, C., Niedzielska, A., Kiefer, P., Raubal, M., and Giannopoulos, I. (2018). The Index of Pupillary Activity: Measuring Cognitive Load vis-à-vis Task Difficulty with Pupil Oscillation. Proc. ACM Hum.-Comput. Interact. 2, 282:1–282:13. doi: 10.1145/3173574.3173856

In [3]:
def modmax(d):

    # compute signal modulus
    m = [0.0]*len(d)
    for i in range(len(d)):
        m[i] = math.fabs(d[i])

    # if value is larger than both neighbours, and strictly larger than either, then it is a local maximum
    t = [0.0]*len(d)
    for i in range(len(d)):
        ll = m[i - 1] if i >= 1 else m[i]
        oo = m[i]
        rr = m[i+1] if i < len(d)-2 else m[i]
        if (ll <= oo and oo >= rr) and (ll < oo or oo > rr):
        # compute magnitude
            t[i] = math.sqrt(d[i]**2)
        else:
            t[i] = 0.0
    return t

In [4]:
def ipa_func(d):
    # obtain 2-level DWT of pupil diameter signal d
    try:
        (cA2 ,cD2 ,cD1) = pywt.wavedec(d,'sym8', 'per', level=2)
    except ValueError :
        return
    # get signal duration (in seconds)
    tt = d.index[-1] - d.index[0]
    # normalize by 1/2 j, j = 2 for 2-level DWT
    cA2 [:] = [x / math.sqrt (4.0) for x in cA2]
    cD1 [:] = [x / math.sqrt (2.0) for x in cD1]
    cD2 [:] = [x / math.sqrt (4.0) for x in cD2]

    # detect modulus maxima
    cD2m = modmax(cD2)
    
    # threshold using universal threshold univ = sqrt(oˆp(2logn)
    # where o is the standard deviation of the noise
    univ = np.std(cD2m) * math.sqrt (2.0 * np.log2(len(cD2m )))
    cD2t = pywt.threshold(cD2m, univ, mode="hard")
    # compute IPA
    ctr = 0
    for i in range(len(cD2t )):
        if math.fabs(cD2t[i]) > 0: ctr += 1
    IPA = float(ctr)/tt.total_seconds()

    return IPA

## Statistical Functions

In [5]:
def iqr_outlier_indices(data):
    q1 = data.quantile(.25)
    q3 = data.quantile(.75)
    iqr = stats.iqr(data, nan_policy='omit', rng=(25, 75))
    return np.where((data < (q1 - 1.5 * iqr)) | (data > (q3 + 1.5 * iqr)))

In [6]:
def iqr_stats(data):
    q1 = np.percentile(data, 25)
    q3 = np.percentile(data, 75)
    iqr = stats.iqr(data, nan_policy='omit', rng=(25, 75))
    return iqr, q1, q3

## Data Processing Functions

In [7]:
def import_data(file):
    streams, header = pyxdf.load_xdf(file)
    dfs = {}
    for stream in streams:
        stream_name = stream['info']['name'][0]
        stream_channels = {channel['label'][0]: i for i, channel in enumerate(stream['info']['desc'][0]['channels'][0]['channel'])}
        stream_data = stream['time_series']
        data_dict = {key: np.array(stream_data)[:, index] for key, index in stream_channels.items()}
        data_dict['time'] = np.round(np.array(stream['time_stamps']), decimals=4)
        dfs[stream_name] = pd.DataFrame(data_dict).drop_duplicates(subset=['time']).reset_index(drop=True)
    return dfs

In [8]:
accom_time = pd.to_timedelta(0.5, unit='s')

In [9]:
method_cats = CategoricalDtype(['4DoF','6DoF', 'unimanual','bimanual'], ordered=False)
model_cats = CategoricalDtype(['A', 'B', 'C', 'D'], ordered=True)
block_cats = CategoricalDtype(['0', '1', '2', '3'], ordered=True)
event_cats = CategoricalDtype(['Start', 'PointPlaced', 'Move', 'End', 'Draw', 'Erase', 'PointDeleted'], ordered=False)
target_cats = CategoricalDtype(['1','2'], ordered=False)
trial_cats = CategoricalDtype(['0','1','2','3'], ordered=True)
task_trial_cats = CategoricalDtype(['0','1','2','3', '4', '5', '6', '7'], ordered=True)
ssq_cats = CategoricalDtype(['None', 'Slight', 'Moderate', 'Severe'], ordered=True)

data_names = ['id', 'block', 'model', 'method']

In [10]:
mA = [0,1,3,2]
mB = [1,2,0,3]
mC = [2,3,1,0]
mD = [3,0,2,1]

tA = [0,1,2,3]
tB = [0,1,3,2]
tC = [1,0,2,3]
tD = [1,0,3,2]

model_blocks = [
    [mA,mB,mC,mD],
    [mB,mC,mD,mA],
    [mC,mD,mA,mB],
    [mD,mA,mB,mC]
]

method_blocks = [
    tA, tB, tC, tD
]

In [11]:
def clean_stream(stream_df, block, range_frame):
    df = pd.DataFrame()

    for col in stream_df.columns:
        df[col] = stream_df[col]
    
    stream_df['ModelID'] = stream_df['ModelID'].mask(stream_df['ModelID'] == 'generic', '99')
    stream_df['MethodID'] = stream_df['MethodID'].mask(stream_df['MethodID'] == 'generic', '99')
    df['ModelID'] = stream_df['ModelID'].astype(int)
    df['trial_id'] = stream_df['ModelID'].astype(int)
    df['MethodID'] = stream_df['MethodID'].astype(int)
    df['task_trial_id'] = df['trial_id']
    df['task_trial_id'] = df['task_trial_id'].mask(df['task_trial_id'] == 4, 99)

    for j in range_frame:
        m = 0 if j == range_frame[0] else 4
        for i in range(4):
            method = method_blocks[block][j]
            current_trial_id = df.loc[(df['trial_id']==i) & (df['MethodID']==method), 'trial_id']
            df['ModelID'] = df['ModelID'].mask((df['trial_id']==i) & (df['MethodID']==method), model_blocks[block][j][i])
            df.loc[(df['trial_id']==i) & (df['MethodID']==method), 'task_trial_id'] = m + current_trial_id
    
    return df

In [12]:
def process_gaze_luminance_data(stream_df):
    pupil = stream_df['GazeStream'].loc[(stream_df['GazeStream']['LeftEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['RightEyeIsBlinking'] == 0) 
                                        & (stream_df['GazeStream']['LeftPupilDiameter'] > 0) 
                                        & (stream_df['GazeStream']['RightPupilDiameter'] > 0), 
                                        ['time', 'trial_id', 'task_trial_id', 'MethodID', 'ModelID', 'LeftPupilDiameter', 'RightPupilDiameter']]
    pupil['time'] = pd.to_timedelta(pupil['time'], unit='s')

    lum = stream_df['LuminanceStream'].loc[:, ['time', 'MethodID', 'ModelID', 'Luminance']]
    lum['time'] = pd.to_timedelta(lum['time'], unit='s')

    # Intersection of time stamps
    pupil_lum_time_intersection = np.intersect1d(pupil['time'], lum['time'])

    # Filter pupil and luminance data by intersection
    pupil = pupil[pupil['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)
    lum = lum[lum['time'].isin(pupil_lum_time_intersection)].reset_index(drop=True)

    # Combined DataFrame for pupil and luminance
    pupil_lum = pd.DataFrame({
        'time': pd.to_timedelta(pupil_lum_time_intersection, unit='s'),
        'luminance': lum['Luminance'],
        'pupilDiameter': 0.5 * (pupil['LeftPupilDiameter'] + pupil['RightPupilDiameter']),
        'methodID': pupil['MethodID'],
        'modelID': pupil['ModelID'],
        'trial_id': pupil['trial_id'],
        'task_trial_id': pupil['task_trial_id']
    })

    outliers = iqr_outlier_indices(pupil_lum['pupilDiameter'])
    pupil_lum = pupil_lum.drop(pupil_lum.iloc[outliers].index).reset_index(drop=True)

    return pupil_lum

In [13]:
def process_calibration_data(pupil_lum_df, stream_df):
    calibration_events = stream_df['ExperimentStream'].loc[(stream_df['ExperimentStream']['EventType'] == 'CalibrationColorChange') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'Calibration') | 
                                                           (stream_df['ExperimentStream']['SceneEvent'] == 'CalibrationComplete'), 
                                                           ['time','SceneEvent', 'EventType']]
    calibration_events['time'] = pd.to_timedelta(calibration_events['time'], unit='s')
    c_start_times = calibration_events[:8]['time']
    c_end_times = calibration_events[1:]['time']
    c_start_times.reset_index(drop=True, inplace=True)
    c_end_times.reset_index(drop=True, inplace=True)

    calib_data = {}
    for i in range(8):
        calib_data[i] = pupil_lum_df.loc[(pupil_lum_df['time'] >= c_start_times[i]) & (pupil_lum_df['time'] <= c_end_times[i]), ['time','luminance', 'pupilDiameter']]
        calib_data[i]['time'] -= calib_data[i]['time'].iloc[0]
        calib_data[i] = calib_data[i].loc[(calib_data[i]['time'] >= accom_time), ['luminance', 'pupilDiameter']]

    calibration_data = pd.concat(calib_data).groupby(level=0).mean().sort_values(by=['luminance']).reset_index(drop=True)
    return calibration_data

In [14]:
def process_navigation_data(pupil_lum_df, stream_df, a, b, c):
    grouped_data = stream_df['NavigationStream'].groupby(['ModelID', 'MethodID'])

    stream_df['SurveyStream']['ModelID'] = stream_df['SurveyStream']['ModelID'].astype(float)
    stream_df['SurveyStream']['MethodID'] = stream_df['SurveyStream']['MethodID'].astype(float)
    
    discomfort_survey = stream_df['SurveyStream'].loc[
        (stream_df['SurveyStream']['SurveyType'] == 'Discomfort') & 
        (stream_df['SurveyStream']['ModelID'] < 4), 
        ['time', 'ModelID', 'MethodID']]
    survey_group = discomfort_survey.groupby(['ModelID', 'MethodID'])

    start_times = []
    end_times = []
    total_time = []

    for i in range(4):
        for j in range(2,4):
            trial = grouped_data.get_group((i, j))

            start = trial.loc[(trial['spline_percent'] > 0.001)].index[0]
            start_time = pd.to_timedelta(stream_df['NavigationStream'].loc[start, 'time'], unit='s')

            end = trial.loc[(trial['spline_percent'] > 0.995)]
            end_time = 0
            # For 6DoF navigation, completion was determined by collision with bounding box
            # Spline percentage was based on projection, so it may not reach > 0.995.
            # In this case, the survey time serves as the end time (rather than lowering the threshold)
            if len(end) > 0:
                end = end.index[0]
                end_time = pd.to_timedelta(stream_df['NavigationStream'].loc[end, 'time'], unit='s')
            else:
                end = survey_group.get_group((i, j)).index[0]
                end_time = pd.to_timedelta(stream_df['SurveyStream'].loc[end, 'time'], unit='s') - pd.offsets.Second(3)
            
            start_times.append(start_time)
            end_times.append(end_time)
            total_time.append((end_time - start_time).total_seconds())


    nav_start_times = start_times
    nav_end_times = end_times

    nav_data = {}
    for i in range(8):
        nav_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['luminance'] >0) & 
            (pupil_lum_df['time']>nav_start_times[i]) & 
            (pupil_lum_df['time']<nav_end_times[i]), 
            ['time', 'trial_id', 'task_trial_id', 'methodID', 'modelID', 'luminance', 'pupilDiameter']]
        nav_data[i].reset_index(drop=True, inplace=True)

    navigation_data = pd.concat(nav_data, names=['trial'])
    navigation_data['pupil_lum_base'] = pupil_func(navigation_data['luminance'], a, b, c)
    navigation_data['adj_pupil'] = navigation_data['pupilDiameter'] - navigation_data['pupil_lum_base']

    return navigation_data, total_time

In [15]:
def process_creation_data(pupil_lum_df, stream_df, a, b, c):
    
    crt_start_times = stream_df['CreationStream'].loc[
    (stream_df['CreationStream']['EventName'] == 'StartPointRegistered'), ['time', 'ModelID', 'MethodID']]
    crt_start_times = pd.to_timedelta(crt_start_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s') + pd.offsets.Second(2)
    crt_start_times.reset_index(drop=True, inplace=True)

    crt_end_times = stream_df['CreationStream'].loc[(stream_df['CreationStream']['EventName'] == 'FinishPath'), ['time', 'ModelID', 'MethodID']]
    crt_end_times = pd.to_timedelta(crt_end_times.groupby(['ModelID', 'MethodID']).first()['time'], unit='s')
    crt_end_times.reset_index(drop=True, inplace=True)
    
    total_time = crt_end_times - crt_start_times
    total_time = total_time.apply(lambda x: x.total_seconds())

    crt_data = {}
    for i in range(8):
        crt_data[i] = pupil_lum_df.loc[
            (pupil_lum_df['time'] > crt_start_times.loc[i]) & 
            (pupil_lum_df['time'] < crt_end_times.loc[i]), 
            ['time', 'methodID', 'modelID', 'trial_id',  'task_trial_id', 'luminance', 'pupilDiameter']]
        crt_data[i].reset_index(drop=True, inplace=True)

    creation_data = pd.concat(crt_data, names=['trial'])
    creation_data['pupil_lum_base'] = pupil_func(creation_data['luminance'], a, b, c)
    creation_data['adj_pupil'] = creation_data['pupilDiameter'] - creation_data['pupil_lum_base']
    
    return creation_data, total_time

In [16]:
def process_creation_stats(stream_df):
    
    group = stream_df['CreationStream'].groupby(['ModelID', 'MethodID'])
    creation_counts  = []

    for i in range(4):
            for j in range(0,2):
                trial = group.get_group((i,j))
                creation_counts.append(trial.groupby('EventType', observed=False).size().fillna(0))
    
    keys = [(i,j) for i in range(4) for j in range(0,2)]
    creation_stats = pd.concat(creation_counts, axis=0, keys=keys, names=['ModelID', 'MethodID']).unstack(level=2)
    creation_stats = creation_stats.drop(columns=['End', 'Start'])
    return creation_stats

In [17]:
def process_target_trial_data_crt(stream_df):
    target_set = stream_df['ExperimentStream'].loc[
        (stream_df['ExperimentStream']['EventType'] == 'TargetPointsSet') | 
        (stream_df['ExperimentStream']['EventType'] == 'SceneLoaded') & 
        (stream_df['ExperimentStream']['SceneEvent'] != 'Calibration') & 
        (stream_df['ExperimentStream']['MethodID'] != 2) & 
        (stream_df['ExperimentStream']['MethodID'] != 3), ['SceneEvent', 'EventType', 'ModelID', 'MethodID']]

    generic_idx = np.where(target_set['ModelID'] == 99)[0]
    idx = [x+1 for x in generic_idx]

    for i in range(len(generic_idx)):
        target_set.loc[target_set.index[generic_idx[i]], 'ModelID']  = target_set.loc[target_set.index[idx[i]], 'ModelID']
        target_set.loc[target_set.index[generic_idx[i]], 'MethodID']  = target_set.loc[target_set.index[idx[i]], 'MethodID']

    target_set = target_set.loc[(target_set['ModelID'] < 4) & (target_set['EventType'] == 'TargetPointsSet'), ['SceneEvent', 'ModelID', 'MethodID']]

    target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set1', target_cats.categories[0])['SceneEvent']
    target_set['SceneEvent'] = target_set.mask(target_set['SceneEvent'] == 'TargetsLoaded_Set2', target_cats.categories[1])['SceneEvent']

    target_set['targetID'] = target_set['SceneEvent']
    target_set.drop(columns=['SceneEvent'], inplace=True)

    return target_set

In [18]:
def process_target_trial_data_nav(stream_df, target_set_group):
    target_set = stream_df['NavigationStream'].loc[(stream_df['NavigationStream']['ModelID'] < 4), ['model_source', 'ModelID', 'MethodID']]

    groups = target_set.groupby(['ModelID', 'MethodID'])

    target_set['creation_method'] = '-1'
    target_set['target_set_source'] = '-1'

    target_trials = []

    for i in range(4):
        for j in range(2,4):
            trial = groups.get_group((i, j))
            model_source = trial['model_source'].iloc[0]
            model_value16 = model_source - 16
            method = '-1'
            method_id = -1

            if model_value16 > 9:
                method = 'bimanual'
                method_id = 1
            else:
                method = 'unimanual'
                method_id = 0

            trial.loc[:, 'creation_method'] = method

            model_i = trial['ModelID'].iloc[0]
            group = target_set_group.get_group((int(model_i),int(method_id)))

            target = group['targetID'].iloc[0]
            trial.loc[:, 'target_set_source'] = target
            target_trials.append(trial.iloc[:1])

    target_set = pd.concat(target_trials)

    target_set['creation_method'] = target_set['creation_method'].astype('string').astype(method_cats)
    target_set['target_set_source'] = target_set['target_set_source'].astype('string').astype(target_cats)
    target_set.drop(columns=['model_source'], inplace=True)
    target_set.reset_index(drop=True, inplace=True)

    return target_set

In [19]:
def process_discomfort_data(stream_df) -> pd.DataFrame:
    discomfort_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'Discomfort', ['time', 'Value', 'ModelID', 'MethodID']]
    discomfort_values['time'] = pd.to_timedelta(discomfort_values['time'], unit='s')
    discomfort_values.reset_index(drop=True, inplace=True)
    return discomfort_values

In [20]:
def process_seq_data(stream_df) -> pd.DataFrame:
    seq_values = stream_df['SurveyStream'].loc[stream_df['SurveyStream']['SurveyType'] == 'SEQ', ['time', 'Value', 'ModelID', 'MethodID']]
    seq_values['time'] = pd.to_timedelta(seq_values['time'], unit='s')
    seq_values.reset_index(drop=True, inplace=True)
    return seq_values

In [21]:
def process_ipa_calc(data):
    methods = []
    models = []
    ipa = []
    for i in range(8):
        methods.append(data.loc[i]['methodID'].iloc[i])
        models.append(data.loc[i]['modelID'].iloc[i])
        pupil = data.loc[i]['pupilDiameter']
        pupil.index = data.loc[i]['time']
        ipa.append(ipa_func(pupil))
        
    return pd.DataFrame({'methodID': methods, 'modelID': models, 'IPA': ipa})

## Import Data

In [22]:
data_dir = join(getcwd(),'Path_Data')
data_files = [join(data_dir, f) for f in listdir(data_dir) if isfile(join(data_dir, f))]

In [23]:
dfs = []
for file in data_files:
    df = import_data(file)
    #'LuminanceStream', 'GazeStream', 'NavigationStream', 'CreationStream', 'PoseStream', 'ExperimentStream', 'SurveyStream', 'TrackedPoseStream'
    # Remove final empty row from string data streams
    df['SurveyStream'] = df['SurveyStream'].replace(r'^\s*$', np.nan, regex=True).dropna()
    df['CreationStream'] = df['CreationStream'].replace(r'^\s*$', np.nan, regex=True).dropna()
    df['ExperimentStream'] = df['ExperimentStream'].replace(r'^\s*$', np.nan, regex=True).dropna()

    df['SurveyStream']['ModelID'] = df['SurveyStream']['ModelID'].astype(float)
    df['SurveyStream']['MethodID'] = df['SurveyStream']['MethodID'].astype(float)
    df['SurveyStream']['Value'] = df['SurveyStream']['Value'].astype(float)

    df['CreationStream']['ModelID'] = df['CreationStream']['ModelID'].astype(float)
    df['CreationStream']['MethodID'] = df['CreationStream']['MethodID'].astype(float)
    df['CreationStream']['EventType'] = df['CreationStream']['EventType'].astype(event_cats)

    block = int(df['ExperimentStream']['BlockID'][0])
    df['GazeStream'] = clean_stream(df['GazeStream'], block, range(4))
    df['LuminanceStream'] = clean_stream(df['LuminanceStream'], block, range(4))
    df['NavigationStream'] = clean_stream(df['NavigationStream'], block, range(2,4))
    df['CreationStream'] = clean_stream(df['CreationStream'], block, range(2))
    df['PoseStream'] = clean_stream(df['PoseStream'], block, range(4))
    df['SurveyStream'] = clean_stream(df['SurveyStream'], block, range(4))
    df['TrackedPoseStream'] = clean_stream(df['TrackedPoseStream'], block, range(4))
    df['ExperimentStream'] = clean_stream(df['ExperimentStream'], block, range(4))
    dfs.append(df)

KeyboardInterrupt: 

## Process Data

In [24]:
user_dfs_nav = []
user_dfs_crt = []
ids = []
blocks = []

for df in dfs:
    id = df['ExperimentStream']['UserID'][0]
    block = df['ExperimentStream']['BlockID'][0]
    ids.append(id)
    blocks.append(block)

    pupil_lum_df = process_gaze_luminance_data(df)
    calibration_data = process_calibration_data(pupil_lum_df, df)

    # Fit pupil response to luminance
    x_data = calibration_data['luminance']
    y_data = calibration_data['pupilDiameter']
    exp_mod = Model(pupil_func)
    params = exp_mod.make_params(a=1, b=4, c=0)
    result = exp_mod.fit(y_data, params, x=x_data)
    a = result.params['a'].value
    b = result.params['b'].value
    c = result.params['c'].value

    navigation_data, nav_time = process_navigation_data(pupil_lum_df, df, a, b, c)
    ipa_calc_nav = process_ipa_calc(navigation_data)
    creation_data, crt_time = process_creation_data(pupil_lum_df, df, a, b, c)
    ipa_calc_crt = process_ipa_calc(creation_data)
    creation_stats = process_creation_stats(df)
    discomfort = process_discomfort_data(df)
    seq = process_seq_data(df)

    creation_target_trials = process_target_trial_data_crt(df)
    crt_target_trials = creation_target_trials.groupby(['ModelID', 'MethodID'])
    navigation_target_trials = process_target_trial_data_nav(df, crt_target_trials)
    nav_target_trials = navigation_target_trials.groupby(['ModelID', 'MethodID'])

    nav_trials = navigation_data.groupby(['modelID', 'methodID'])
    ipa_nav_trials = ipa_calc_nav.groupby(['modelID', 'methodID'])
    crt_trials = creation_data.groupby(['modelID', 'methodID'])
    ipa_crt_trials = ipa_calc_crt.groupby(['modelID', 'methodID'])
    discomfort_trials = discomfort.groupby(['ModelID', 'MethodID'])
    seq_trials = seq.groupby(['ModelID', 'MethodID'])

    nav_data = {}
    nav_total_time = {}
    ipa_nav_data = {}
    crt_data = {}
    crt_total_time = {}
    ipa_crt_data = {}
    discomfort_data = {}
    seq_crt = {}
    seq_nav = {}
    target_data_crt = {}
    target_data_nav = {}
    creation_data_nav = {}

    for i in range(4):
        for j in range(2,4):
            nav_data[(id, block, i, j)] = nav_trials.get_group((i,j)).mean()
            nav_total_time[(id, block, i, j)] = nav_time[(2*i + (j-2))]
            ipa_nav_data[(id, block, i, j)] = ipa_nav_trials.get_group((i,j)).mean()
            discomfort_data[(id, block, i, j)] = discomfort_trials.get_group((i,j)).mean()
            seq_nav[(id, block, i, j)] = seq_trials.get_group((i,j)).mean()
            target_data_nav[(id, block, i, j)] = nav_target_trials.get_group((i,j))['target_set_source'].iloc[0]
            creation_data_nav[(id, block, i, j)] = nav_target_trials.get_group((i,j))['creation_method'].iloc[0]
    
    for i in range(4):
        for j in range(0,2):
            crt_data[(id, block, i, j)] = crt_trials.get_group((i,j)).mean()
            crt_total_time[(id, block, i, j)] = crt_time[(2*i + j)]
            ipa_crt_data[(id, block, i, j)] = ipa_crt_trials.get_group((i,j)).mean()
            seq_crt[(id, block, i, j)] = seq_trials.get_group((i,j)).mean()
            target_data_crt[(id, block, i, j)] = crt_target_trials.get_group((i,j))['targetID'].iloc[0]
    
    nav_index = pd.MultiIndex.from_product([[id], [block], model_cats.categories, method_cats.categories[0:2]], names=data_names)
    crt_index = pd.MultiIndex.from_product([[id], [block], model_cats.categories, method_cats.categories[2:4]], names=data_names)

    nav_data = pd.concat(nav_data, axis=1, names=data_names).T
    nav_data['total_time'] = nav_total_time
    nav_data['target_source'] = target_data_nav
    nav_data['creation_method'] = creation_data_nav
    nav_data.index = nav_index
    nav_data.drop(columns=['time', 'modelID', 'methodID'], inplace=True)
    
    crt_data = pd.concat(crt_data, axis=1, names=data_names).T
    crt_data['total_time'] = crt_total_time
    crt_data['target_id'] = target_data_crt
    crt_data.index = crt_index
    crt_data.drop(columns=['time', 'modelID', 'methodID'], inplace=True)

    creation_stats.index = crt_index

    ipa_nav_data = pd.concat(ipa_nav_data, axis=1, names=data_names).T
    ipa_nav_data.index = nav_index
    ipa_nav_data.drop(columns=['modelID', 'methodID'], inplace=True)

    ipa_crt_data = pd.concat(ipa_crt_data, axis=1, names=data_names).T
    ipa_crt_data.index = crt_index
    ipa_crt_data.drop(columns=['modelID', 'methodID'], inplace=True)

    discomfort_data = pd.concat(discomfort_data, axis=1, names=data_names).T
    discomfort_data.index = nav_index
    discomfort_data['discomfort'] = discomfort_data['Value']
    discomfort_data.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    seq_nav = pd.concat(seq_nav, axis=1, names=data_names).T
    seq_nav.index = nav_index
    seq_nav['seq'] = seq_nav['Value']
    seq_nav.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    seq_crt = pd.concat(seq_crt, axis=1, names=data_names).T
    seq_crt.index = crt_index
    seq_crt['seq'] = seq_crt['Value']
    seq_crt.drop(columns=['time', 'ModelID', 'MethodID', 'Value'], inplace=True)

    df_crt = pd.concat([crt_data, ipa_crt_data, seq_crt, creation_stats], axis=0).stack().unstack()
    df_crt.loc[(slice(None), slice(None), slice(None), slice(None)), ('Draw', 'Erase', 'PointPlaced', 'Move', 'PointDeleted')] = df_crt.loc[(slice(None), slice(None), slice(None), slice(None)), ('Draw', 'Erase', 'PointPlaced', 'Move', 'PointDeleted')].astype(int)
    df_crt.loc[(slice(None), slice(None), slice(None), 'unimanual'), ('Draw', 'Erase')] = df_crt.loc[(slice(None), slice(None), slice(None), 'unimanual'), ('Draw', 'Erase')].astype(int).fillna(0)
    df_crt.loc[(slice(None), slice(None), slice(None), 'bimanual'), ('PointPlaced', 'Move', 'PointDeleted')] = df_crt.loc[(slice(None), slice(None), slice(None), 'bimanual'), ('PointPlaced', 'Move', 'PointDeleted')].astype(int).fillna(0)

    df_nav = pd.concat([nav_data, ipa_nav_data, discomfort_data, seq_nav], axis=0).stack().unstack()

    user_dfs_nav.append(df_nav)
    user_dfs_crt.append(df_crt)


In [25]:
user_data_nav = pd.concat(user_dfs_nav)
nav_dtype = {'luminance': 'float64', 'pupilDiameter': 'float64', 'pupil_lum_base': 'float64', 'adj_pupil': 'float64', 'IPA': 'float64', 'discomfort': 'int32', 'seq': 'int32', 'total_time': 'float64'}
user_data_nav = user_data_nav.astype(nav_dtype)

short_nav = user_data_nav['total_time'] < 10
user_data_nav = user_data_nav[~short_nav]

user_data_crt = pd.concat(user_dfs_crt)
crt_dtype = {'luminance' : 'float64', 'pupilDiameter' : 'float64', 'pupil_lum_base' : 'float64', 'adj_pupil' : 'float64', 'IPA' : 'float64', 'seq' : 'int32', 'PointPlaced' : 'int32', 'Move' : 'int32', 'Draw' : 'int32', 'Erase' : 'int32', 'PointDeleted' : 'int32', 'total_time': 'float64'}
user_data_crt = user_data_crt.astype(crt_dtype)

short_crt = user_data_crt['total_time'] < 10
user_data_crt = user_data_crt[~short_crt]

In [26]:
user_data_nav.to_pickle('user_data_nav.pkl')
user_data_crt.to_pickle('user_data_crt.pkl')

In [28]:
pre_csv = pd.read_csv('pre_study.csv', keep_default_na = False, na_values = [''])
post_csv = pd.read_csv('post_study.csv', keep_default_na = False, na_values = [''])

pre_csv['Q3_1'] = pre_csv['Q3_1'].astype(ssq_cats).cat.codes
pre_csv['Q3_2'] = pre_csv['Q3_2'].astype(ssq_cats).cat.codes
pre_csv['Q3_3'] = pre_csv['Q3_3'].astype(ssq_cats).cat.codes
pre_csv['Q3_4'] = pre_csv['Q3_4'].astype(ssq_cats).cat.codes
pre_csv['Q3_5'] = pre_csv['Q3_5'].astype(ssq_cats).cat.codes
pre_csv['Q3_6'] = pre_csv['Q3_6'].astype(ssq_cats).cat.codes
pre_csv['Q3_7'] = pre_csv['Q3_7'].astype(ssq_cats).cat.codes
pre_csv['Q3_8'] = pre_csv['Q3_8'].astype(ssq_cats).cat.codes
pre_csv['Q3_9'] = pre_csv['Q3_9'].astype(ssq_cats).cat.codes
pre_csv['Q3_10'] = pre_csv['Q3_10'].astype(ssq_cats).cat.codes
pre_csv['Q3_11'] = pre_csv['Q3_11'].astype(ssq_cats).cat.codes
pre_csv['Q3_12'] = pre_csv['Q3_12'].astype(ssq_cats).cat.codes
pre_csv['Q3_13'] = pre_csv['Q3_13'].astype(ssq_cats).cat.codes
pre_csv['Q3_14'] = pre_csv['Q3_14'].astype(ssq_cats).cat.codes
pre_csv['Q3_15'] = pre_csv['Q3_15'].astype(ssq_cats).cat.codes
pre_csv['Q3_16'] = pre_csv['Q3_16'].astype(ssq_cats).cat.codes

post_csv['Q3_1'] = post_csv['Q3_1'].astype(ssq_cats).cat.codes
post_csv['Q3_2'] = post_csv['Q3_2'].astype(ssq_cats).cat.codes
post_csv['Q3_3'] = post_csv['Q3_3'].astype(ssq_cats).cat.codes
post_csv['Q3_4'] = post_csv['Q3_4'].astype(ssq_cats).cat.codes
post_csv['Q3_5'] = post_csv['Q3_5'].astype(ssq_cats).cat.codes
post_csv['Q3_6'] = post_csv['Q3_6'].astype(ssq_cats).cat.codes
post_csv['Q3_7'] = post_csv['Q3_7'].astype(ssq_cats).cat.codes
post_csv['Q3_8'] = post_csv['Q3_8'].astype(ssq_cats).cat.codes
post_csv['Q3_9'] = post_csv['Q3_9'].astype(ssq_cats).cat.codes
post_csv['Q3_10'] = post_csv['Q3_10'].astype(ssq_cats).cat.codes
post_csv['Q3_11'] = post_csv['Q3_11'].astype(ssq_cats).cat.codes
post_csv['Q3_12'] = post_csv['Q3_12'].astype(ssq_cats).cat.codes
post_csv['Q3_13'] = post_csv['Q3_13'].astype(ssq_cats).cat.codes
post_csv['Q3_14'] = post_csv['Q3_14'].astype(ssq_cats).cat.codes
post_csv['Q3_15'] = post_csv['Q3_15'].astype(ssq_cats).cat.codes
post_csv['Q3_16'] = post_csv['Q3_16'].astype(ssq_cats).cat.codes

In [29]:
pre_n_raw = pre_csv['Q3_1'] + pre_csv['Q3_6'] + pre_csv['Q3_7'] + pre_csv['Q3_8'] + pre_csv['Q3_9'] + pre_csv['Q3_15'] + pre_csv['Q3_16']
pre_o_raw = pre_csv['Q3_1'] + pre_csv['Q3_2'] + pre_csv['Q3_3'] + pre_csv['Q3_4'] + pre_csv['Q3_5'] + pre_csv['Q3_9'] + pre_csv['Q3_11']
pre_d_raw = pre_csv['Q3_5'] + pre_csv['Q3_8'] + pre_csv['Q3_10'] + pre_csv['Q3_11'] + pre_csv['Q3_12'] + pre_csv['Q3_13'] + pre_csv['Q3_14']

post_n_raw = post_csv['Q3_1'] + post_csv['Q3_6'] + post_csv['Q3_7'] + post_csv['Q3_8'] + post_csv['Q3_9'] + post_csv['Q3_15'] + post_csv['Q3_16']
post_o_raw = post_csv['Q3_1'] + post_csv['Q3_2'] + post_csv['Q3_3'] + post_csv['Q3_4'] + post_csv['Q3_5'] + post_csv['Q3_9'] + post_csv['Q3_11']
post_d_raw = post_csv['Q3_5'] + post_csv['Q3_8'] + post_csv['Q3_10'] + post_csv['Q3_11'] + post_csv['Q3_12'] + post_csv['Q3_13'] + post_csv['Q3_14']

In [30]:
participant_data = pd.DataFrame()
participant_data['id'] = post_csv['Q1']
participant_data['block'] = post_csv['Q1'] % 4
participant_data['age'] = post_csv['Q11']
participant_data['sex'] = post_csv['Q12']
participant_data['hand'] = post_csv['Q14']
participant_data['motion_sick'] = post_csv['Q2']
participant_data['pre_ssq'] = (pre_n_raw + pre_o_raw + pre_d_raw) * 3.74
participant_data['post_ssq'] = (post_n_raw + post_o_raw + post_d_raw) * 3.74
participant_data['delta_ssq'] = participant_data['post_ssq'] - participant_data['pre_ssq']
participant_data['crt_pref'] = post_csv['Q4']
participant_data['nav_pref'] = post_csv['Q6']

In [31]:
participant_data.to_pickle('participant_data.pkl')