# ErgoVR- Object selection strategies revealed by gaze movement in VR

In [48]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from utils.pre_processing_functions import *
from datetime import date
import os

In [2]:
DATA_PATH = '../experiment_et_data/'
META_DATA_PATH = '../experiment_meta/'
PROCESSED_DATA_PATH = '../processed_data/EIH_1/'
os.makedirs(os.path.dirname(PROCESSED_DATA_PATH), exist_ok=True)

In [45]:
import multiprocessing as mp
from functools import partial

def pandas_mp_apply(grouped_df, apply_func):
    with mp.Pool(mp.cpu_count() - 1) as p:
        ret_list = p.map(apply_func, [(group, name) for name, group in grouped_df])
    
    return pd.concat(ret_list)

## Read .etd files and save as df

In [None]:
#execution time ~6h so run only if restarting 
read_preprocess_save(DATA_PATH, PROCESSED_DATA_PATH)

In [None]:
samples_df = pd.read_csv(
    f'{PROCESSED_DATA_PATH}/00_ET_samples_master.csv',
    parse_dates=['timestamp_dt'], 
    index_col=False,
    infer_datetime_format=True,
)

In [None]:
# display(samples_df.iloc[:, 0:25].isna().sum().to_frame())
display(samples_df.columns)

In [None]:
samples_df['EIH_pos_x'] = np.nan
samples_df['EIH_pos_y'] = np.nan
samples_df['EIH_pos_z'] = np.nan
samples_df['EIH_dir_x'] = np.nan
samples_df['EIH_dir_y'] = np.nan
samples_df['EIH_dir_z'] = np.nan
samples_df['eye_shelf_hit'] = np.nan
samples_df.head()

In [None]:
(
    samples_df
    .dropna(subset=['combinedEye_position_x', 'combinedEye_position_y',
       'combinedEye_position_z', 'combinedEye_direction_x',
       'combinedEye_direction_y', 'combinedEye_direction_z', 
       'nosePointer_position_x', 'nosePointer_position_y',
       'nosePointer_position_z', 'nosePointer_direction_x',
       'nosePointer_direction_y', 'nosePointer_direction_z',])
    .to_csv(
        f'{PROCESSED_DATA_PATH}/01_ET_samples_EIH_offline_raycast_no_nan_1.csv',
        index=False,
    )
)

## Data sent for offline raycasting
Next steps are after offline raycasting as been done and the csv filled with the EIH and shelf_hit values

## Calculate gaze information

In [52]:
samples_df = pd.read_csv(
    f'{PROCESSED_DATA_PATH}/01_ET_samples_EIH_offline_raycast_no_nan_1.csv',
    parse_dates=['timestamp_dt'], 
    index_col=False,
    infer_datetime_format=True,
)

  interactivity=interactivity, compiler=compiler, result=result)


In [54]:
samples_df.drop(columns=['unixTimestamp_seconds', 'timestamp',
                         'leftEye_position_x', 'leftEye_position_y',
       'leftEye_position_z', 'leftEye_direction_x', 'leftEye_direction_y',
       'leftEye_direction_z', 'leftEye_raycastHitObject',
       'leftEye_raycastHitLocation_x', 'leftEye_raycastHitLocation_y',
       'leftEye_raycastHitLocation_z', 'rightEye_position_x',
       'rightEye_position_y', 'rightEye_position_z', 'rightEye_direction_x',
       'rightEye_direction_y', 'rightEye_direction_z',
       'rightEye_raycastHitObject', 'rightEye_raycastHitLocation_x',
       'rightEye_raycastHitLocation_y', 'rightEye_raycastHitLocation_z',
                        ],
                inplace=True
)
samples_df.shape

(10156123, 46)

In [55]:
samples_df.dropna(subset = ['subjectfileName'], inplace=True)
tmpdf = (
    samples_df
#     .dropna(subset = ['subjectfileName'])
    .query('subjectfileName.str.endswith("_2.etd")')
    .groupby(['subjectID', 'subjectfileName'],)
    .trialNum
#     .last()
    .apply(lambda s: s + 23 - s.max())
#     .reset_index()
)
samples_df['trial_num'] = tmpdf
samples_df.loc[
    ~samples_df.subjectfileName.str.endswith("_2.etd"), 
    'trial_num'] = samples_df.loc[~samples_df.subjectfileName.str.endswith("_2.etd"), 'trialNum']

samples_df.shape

(10156122, 47)

In [58]:
samples_df.head()

Unnamed: 0,combinedEye_position_x,combinedEye_position_y,combinedEye_position_z,combinedEye_direction_x,combinedEye_direction_y,combinedEye_direction_z,combinedEye_raycastHitObject,combinedEye_raycastHitLocation_x,combinedEye_raycastHitLocation_y,combinedEye_raycastHitLocation_z,...,subjectfileName,timestamp_dt,EIH_pos_x,EIH_pos_y,EIH_pos_z,EIH_dir_x,EIH_dir_y,EIH_dir_z,eye_shelf_hit,trial_num
0,0.002804,-0.001095,-0.024177,-0.054383,0.050156,0.99726,Wall,-0.213,1.367172,0.308637,...,../experiment_et_data\subject2006.etd,1970-01-01 00:00:00.333333343,0.008821,1.591406,-1.2581,-0.134382,-0.136134,0.981534,2_3,0.0
1,0.003028,-0.001079,-0.02418,-0.0542,0.050507,0.997252,Back,-0.039645,1.393615,0.513,...,../experiment_et_data\subject2006.etd,1970-01-01 00:00:00.344199151,-0.010507,1.592493,-1.272246,-0.012008,-0.106957,0.994191,2_3,0.0
2,0.003028,-0.001079,-0.02418,-0.0542,0.050507,0.997252,Back,-0.039172,1.39382,0.513,...,../experiment_et_data\subject2006.etd,1970-01-01 00:00:00.352107048,-0.010445,1.592531,-1.272292,-0.011817,-0.106908,0.994199,2_3,0.0
3,0.003028,-0.001079,-0.02418,-0.0542,0.050507,0.997252,Back,-0.038426,1.393964,0.513,...,../experiment_et_data\subject2006.etd,1970-01-01 00:00:00.360527635,-0.010391,1.592609,-1.272324,-0.011463,-0.106908,0.994203,2_3,0.0
4,0.002949,-0.001093,-0.024174,-0.053986,0.050858,0.997246,Back,-0.037744,1.39459,0.513,...,../experiment_et_data\subject2006.etd,1970-01-01 00:00:00.370871246,-0.010339,1.59263,-1.272321,-0.011151,-0.106618,0.994238,2_3,0.0


### Calculate eye angular velocity

In [59]:
# samples_df.sort_values(by='timestamp_dt', inplace=True)
samples_df = calculate_EIH_theta(samples_df)
samples_df.shape
samples_df = calculate_EIH_angular_velocity(samples_df)

samples_df.shape

(10154782, 57)

### Eye angular acceleration

In [60]:
samples_df =  calculate_EIH_angular_acceleration(samples_df)
samples_df.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  .apply(lambda x: x.diff()/np.timedelta64(1, 's')))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  .apply(lambda x: x.diff()/np.timedelta64(1, 's')))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  .apply(lambda x: x.diff()/np.timedelta64(1, 's')))
A value is trying to be set on a copy of a slice fro

(10153442, 61)

In [61]:
samples_df.to_csv(
    f'{PROCESSED_DATA_PATH}/02_ET_samples_vel_acc.csv',
    index=False,
)

### Label fixation-based samples

In [35]:
samples_df = pd.read_csv(
    f'{PROCESSED_DATA_PATH}/02_ET_samples_vel_acc.csv',
    parse_dates=['timestamp_dt'], 
    index_col=False,
    infer_datetime_format=True,
)

  interactivity=interactivity, compiler=compiler, result=result)


In [63]:
samples_df = get_fixation_samples(samples_df)

### Calculate fixation durations

In [None]:
# samples_df.sort_values(by='timestamp_dt', inplace=True)
samples_df = get_fixation_duration(samples_df)

samples_df.shape

In [None]:
samples_df.to_csv(f'{PROCESSED_DATA_PATH}/04_ET_samples_fixations.csv',
                 index=False)

## Calculate grasp information

In [None]:
samples_df = pd.read_csv(
    f'{PROCESSED_DATA_PATH}/04_ET_samples_fixations.csv',
    parse_dates=['timestamp_dt'], 
    dtype = {'isFixV':np.bool,'isOutlierFix':np.bool,'isOutlierSac':np.bool},
    infer_datetime_format=True,
)

### Calculate grasp onset, grasp stop and grasp duration

In [None]:
samples_df = get_grasp_info(samples_df)

In [None]:
(samples_df
                 .groupby(['subjectID', 'subjectfileName', 'trialNum'], as_index=False)
                 .handData_graspedObject
                 .apply(lambda x: 
                        x
                        .groupby((x != x.shift()).cumsum())
                        .transform(lambda x: x.index[0],
#                                                x.index[-1],
#                                               (x.index[-1] - x.index[0])/np.timedelta64(1,'s')
#                                               ] 
                                   )
                        )
)

### Calculate grasp pickup and drop-off locations

In [None]:
samples_df = get_pickup_dropoff_loc(samples_df, META_DATA_PATH)

In [None]:
samples_df.to_csv(f'{PROCESSED_DATA_PATH}/05_ET_samples_graspInfo.csv',
                 index=False)

# Scatch
code below is not used for preprocessing the data

### Resample data

In [None]:
samples_df = pd.read_csv(
    f'{PROCESSED_DATA_PATH}/02_ET_samples_vel_acc.csv',
    parse_dates=['timestamp_dt'], 
    index_col=False,
    infer_datetime_format=True,
)

In [None]:
samples_df = downsample_data(samples_df)

In [None]:
samples_df.to_csv(
    f'{PROCESSED_DATA_PATH}/03_ET_samples_downsampled_75Hz.csv',
    index=False,
)