# Pilot data check - TODO

- frame rate/subject
- number of grasps/subject
- number of eye samples/object
- number of eye samples/grasped object
- number of eye samples/grasped object/sorting type --> shouldn't be different
- number of samples with NaN
- validation error across trials/subject
- azimuth and elevation angles/subject (histograms)
- Amplitude of head movements based on velocity
- Saccade amplitude based on velocity

# Quick notes
Subjects 1001, 1002, 1003, 1004, 1005, 1007 --> no grasped object recorded

In [1]:
import ProtobufTypes_pb2 
import pandas as pd
import sys
from google.protobuf.json_format import MessageToDict
import collections
import numpy as np
import glob
import seaborn as sns
import os
import matplotlib.pyplot as plt
import pickle
import pprint
from IPython.display import display
PLOT_DIR = './PLOTS/data_quality/'
os.makedirs(os.path.dirname(PLOT_DIR), exist_ok=True)
sns.set(context = "talk", style="white", palette="dark", font_scale=1, rc={'figure.figsize':(11.7,8.27)})
COLORS = {'g': '#CAE6CB', 'r': '#E6CACA', 'b': '#C9D4F5'}

In [2]:
# pickle the data
def pickleData(filepath,filename, data):
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    with open(str(filepath+filename),'wb') as fh:
        pickle.dump(data,fh)

# read from pickled data
def read_dataFrameFromFile(filename):
    with open(filename, 'rb') as fh:
        dataframe = pickle.load(fh)
    return dataframe

In [3]:
protobuf_obj = ProtobufTypes_pb2.EyetrackingDataSet()
files = glob.glob("./subject*.etd")
samples_df = pd.DataFrame()
shelf_df = pd.DataFrame()
for fi in files:
    try:
        with open(fi, "rb") as f:
            protobuf_obj.ParseFromString(f.read())
        dict_obj = MessageToDict(protobuf_obj)
        for nT, trial in enumerate(dict_obj['trials']):
            tmpdf = pd.io.json.json_normalize(data=trial['samples'])
            if 'handData.graspedObject' in tmpdf.columns:
                tmpdf = tmpdf.loc[:, ['unixTimestamp','timestamp', 'combinedEye.raycastHitObject','handData.graspedObject']]
            else:
                tmpdf = tmpdf.loc[:, ['unixTimestamp','timestamp', 'combinedEye.raycastHitObject']]
                                  
            tmpdf['trialID'] = trial['metaData']['trialID'] if 'trialID' in trial['metaData'] else np.NaN  
            tmpdf['subjectID'] = dict_obj['subjectID']
            tmpdf['trialNum'] = nT
            samples_df = pd.concat([samples_df, tmpdf], ignore_index=True, sort=False)             
            
            tmpdf = pd.io.json.json_normalize(data=trial['metaData']['initialConfiguration']['items'])
            tmpdf['trialID'] = trial['metaData']['trialID'] if 'trialID' in trial['metaData'] else np.NaN  
            tmpdf['subjectID'] = dict_obj['subjectID']
            tmpdf['trialNum'] = nT
            shelf_df = pd.concat([shelf_df, tmpdf], ignore_index=True, sort=False) 
            
    except FileNotFoundError:
        print("{} not found, moving on!".format(fi))
    
pickleData('./Data/','reducedAllData',samples_df)
del samples_df
pickleData('./Data/','allShelfData',shelf_df)
del shelf_df

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  return self._getitem_tuple(key)


KeyboardInterrupt: 

In [None]:
samples_df = read_dataFrameFromFile('./Data/reducedAllData')

In [None]:
subject_df = read_dataFrameFromFile('./Data')

In [None]:
samples_df.columns

In [None]:
samples_df.info()

In [None]:
samples_df.isna().mean()

In [None]:
filtered_df = samples_df.dropna(subset=['trialID'])
filtered_df = filtered_df.drop(columns='unixTimestamp')
filtered_df = filtered_df.loc[(filtered_df.timestamp!=-1), :]

In [None]:
filtered_df.subjectID.unique()

In [None]:
filtered_df.trialNum.value_counts()

In [None]:
def replace_objs(s):
    valid_objs = ['Cube_Blue', 'Cube_Green', 'Cube_Red', 'Cube_Yellow', 'Cylinder_Blue', 'Cylinder_Green',
                  'Cylinder_Red','Cylinder_Yellow', 'Sphere_Blue', 'Sphere_Green', 'Sphere_Red',
                  'Sphere_Yellow', 'Tetraeder_Blue', 'Tetraeder_Green', 'Tetraeder_Red', 'Tetraeder_Yellow']
    if s in valid_objs:
        return s
    else:
        return 'Other'



In [None]:
filtered_df['combinedEye.raycastHitObject'] = filtered_df['combinedEye.raycastHitObject'].apply(replace_objs)
display(filtered_df.tail())

In [None]:
objs_dict = {'Other':0, 'Cube_Blue': 1, 'Cube_Green':2, 'Cube_Red':3, 'Cube_Yellow':4, 'Cylinder_Blue':5,
             'Cylinder_Green':6, 'Cylinder_Red':7, 'Cylinder_Yellow':8, 'Sphere_Blue':9, 'Sphere_Green':10,
             'Sphere_Red':11, 'Sphere_Yellow':12, 'Tetraeder_Blue':13, 'Tetraeder_Green':14,
             'Tetraeder_Red':15, 'Tetraeder_Yellow':16}

In [None]:
# %matplotlib notebook
sub = 1008
trialNum = 7

tmpdf = (
    filtered_df
    .rename(columns={'combinedEye.raycastHitObject':'eyeHit', 'handData.graspedObject':'grasp'})
    .query('subjectID == @sub & trialNum == @trialNum')
#     .query('timestamp < 30')
#     .query('eyeHit != "Other"')
    .sort_values('timestamp')
#     [['timestamp', 'eyeHit', 'grasp']]
    .set_index('timestamp')
)
display(tmpdf)
# sns.set(context = "talk", style="whitegrid", palette="dark", font_scale=1, rc={'figure.figsize':(11.7,8.27)})
# _, ax = plt.subplots(figsize=(20,10))
# tmpdf['eyeHit'] = tmpdf['eyeHit'].map(objs_dict)
# tmpdf['grasp'] = tmpdf['grasp'].map(objs_dict)
# ax.scatter(tmpdf.index, tmpdf['eyeHit'],color = 'r', alpha=0.5, label='eyeHit', marker='x')
# ax.scatter(tmpdf.index, tmpdf['grasp'],color = 'g', alpha=0.2, label='grasp', marker='+')

# # tmpdf['eyeHit'].plot(ax=ax, label='eyeHits', yticks=np.arange(17),color = COLORS['r'], alpha=0.5)
# # tmpdf['grasp'].plot(ax=ax, label='grasp', color = COLORS['g'], linestyle=':')
# ax.set_yticks(np.arange(17))
# ax.set_yticklabels(objs_dict.keys())
# ax.set_xlabel('Time (sec)')
# ax.set_ylabel('Objects')
# ax.legend(loc='upper right')
# ax.set_title('Eye Raycast Hit & Grasped Object vs. Time (Subject-{}, Trial = {}'.format(sub, trialNum))
# plt.savefig(PLOT_DIR+'/eye_grasp_sub{}_trial{}.png'.format(sub,trialNum), quality=90)

In [None]:
samples_df['handData.graspedObject'].fillna(0, inplace=True)
samples_df['numHits'] = (
        samples_df['combinedEye.raycastHitObject']
        .groupby((samples_df['combinedEye.raycastHitObject']!=samples_df['combinedEye.raycastHitObject'].shift())
        .cumsum())
        .transform('size')
)
samples_df['eyeChange'] = (
                samples_df['combinedEye.raycastHitObject']!=samples_df['combinedEye.raycastHitObject'].shift()
)
samples_df['handChange'] = (
                samples_df['handData.graspedObject']!=samples_df['handData.graspedObject'].shift()
)

# samples_df['dwellTime'] = (
#                 samples_df['combinedEye.raycastHitObject']!=samples_df['combinedEye.raycastHitObject'].shift()
# )

tmpdf = samples_df
tmpdf.loc[(~tmpdf['handChange']),'handData.graspedObject' ] = 0

grasp = np.where(tmpdf.handChange)[0]
next_grasp = np.where(tmpdf.handChange.shift())[0]
display(tmpdf.loc[next_grasp])

In [None]:
filtered_df.head()

In [None]:
raise

# Scratch

In [None]:
np.arange(1,17)