In [1]:
import numpy as np
import matplotlib
%matplotlib notebook
import matplotlib.pyplot as plt
import os
import sys
import pickle
from struct import *
import pandas as pd
import seaborn as sns
import mixture
sns.set_style("white")
import warnings
warnings.filterwarnings("ignore")
import collections
from math import pi, cos, sin, cosh, tanh
from scipy.spatial.transform import Rotation as Rot
import cv2
from plotting import *
from interpolation import *
from ficks import *
from structure import *

In [2]:
import scipy.interpolate
import scipy.integrate
import scipy.stats
from sklearn.linear_model import RANSACRegressor, TheilSenRegressor, LinearRegression
from sklearn.mixture import GaussianMixture
from scipy.stats import norm
from scipy.signal import argrelextrema
from statistics import median

In [3]:
def clean_data(df):
    """
    Takes in a dataframe and returns a subset where rows with intersample speed over 800 deg/s become Nan.
    """
    assert 'iss' in list(df.columns), "'iss' is not a column in the dataframe"
    
    # Label data where the intersample speed is above the physiological threshold of 800 deg/s as noise.
    for idx, row in df[~np.isnan(df.iss)].iterrows():
        if row.iss <= 0 or row.iss > MAX_VEL:
            df.loc[idx, 'x_deg':'issy'] = np.NAN
    
    # Trim the start and end of dataset to agree with the analyzed experiment
    if DEVICE == 'varjo':
        df = df[SAMPLE_RATE:len(df)-SAMPLE_RATE]
    elif DEVICE == 'PI':
        trim = int(1 * SAMPLE_RATE)
        df = df[trim:len(df)-trim]
    
    # reset row indices
    df.reset_index(drop=True, inplace=True)
    
    return df

In [4]:
data = {}
round_to = 3
#
# INITIALIZE THE VARIABLES
#
STATES = ['fix', 'sac', 'smp', 'blink', 'noise', 'other', 'loss']
COLORS = {'fix':'red', 'sac':'lime', 'smp':'purple', 'blink':'dodgerblue', 'noise':'darkorchid', 'other':'grey', 'loss':'gold'}
BORDERS = {'fix':'magenta', 'sac':'green', 'smp':'blue', 'blink':'royalblue', 'noise':'darkviolet', 'other':'black', 'loss':'yellow'}
MAX_VEL = 800   # [deg/sec]

# All participants and their respective vision loss
participants = {'P2':'CVL', 'P3':'CVL', 'P4':'Other', 'P5':'PVL', 'P6':'CVL', 'P8':'CVL', 'P9':'CVL', 'P10':'CVL',
                'P11':'PVL', 'P12':'PVL', 'P13':'CVL', 'P14':'PVL', 'P15':'PVL', 'P16':'PVL', 'P17':'PVL',
                'P18':'CVL', 'P19':'CVL', 'P21':'CVL', 'P22':'PVL', 'P23':'PVL', 'P24':'PVL', 'P25':'CVL'}

sample_rates = {'PI':66, 'varjo':200}
eyes = {'PI':['combined'], 'varjo':['right', 'left']}
devices = {}

#
# GATHER THE FILES
#
# Upload all tasks data for each device
pi_tasks = {'walk':'/data/Isabella/thesis_spring2022/AMPs1_PI/calibedGaze_Fiona_walk_all_headCorrected.csv',
            'cereal':'/data/Isabella/thesis_spring2022/AMPs1_PI/calibedGaze_Fiona_cereal_all_headCorrected.csv',
            'sandwich':'/data/Isabella/thesis_spring2022/AMPs1_PI/calibedGaze_Fiona_sandwich_all_headCorrected.csv'}
devices['PI'] = pi_tasks

varjo_tasks = {'cereal':'/data/Isabella/thesis_spring2022/AMPs2_varjo/cereal_all.csv',
               'sandwich':'/data/Isabella/thesis_spring2022/AMPs2_varjo/sandwich_all.csv',
               'handwash':'/data/Isabella/thesis_spring2022/AMPs2_varjo/hand_wash_all.csv'}
devices['varjo'] = varjo_tasks

varjo_to_use = {'P2':['cereal'], 'P3':['cereal', 'sandwich'], 'P5':['cereal', 'sandwich'], 
                'P6':['cereal', 'sandwich'], 'P8':['sandwich'], 'P9':['cereal', 'sandwich'],
                'P13':['cereal', 'sandwich'], 'P14':['cereal', 'sandwich'], 'P15':['cereal'],
                'P16':['cereal', 'sandwich'], 'P18':['cereal', 'sandwich'], 'P19':['cereal', 'sandwich'],
                'P21':['cereal'], 'P22':['cereal', 'sandwich'], 'P23':['cereal', 'sandwich'],
                'P24':['cereal', 'sandwich'], 'P25':['cereal', 'sandwich']}

data = {}

for device, tasks in devices.items():
    # Set global variables
    MIN_FIX_DUR = np.ceil(100 - 4*1/sample_rates[device]*1000)/1000
    DEVICE = device
    SAMPLE_RATE = sample_rates[device]
    
    # Loop over tasks within each device
    for task, file in tasks.items():

        calib_excluded = pd.read_csv(file)
        p_list = calib_excluded.subID.unique()
        
        # For varjo (binocular), loop over each eye to run event detection on each eye separately
        for eye in eyes[device]:
        
            #
            # COLLECT THE DATA
            #
            # Loop over participants within each task
            for subID in p_list:

                vision_loss = participants[subID]
                temp = calib_excluded[calib_excluded.subID == subID]

                if device == 'PI':         

                    time, x_deg, y_deg = temp['timestamp(s)'].to_numpy(), temp['gaze_x_deg_calib_excluded'].to_numpy(), temp['gaze_y_deg_calib_excluded'].to_numpy()
                    time  = time-time[0]
                    azimuth = temp['eyeInWorldFicksAz_calib_excluded'].to_numpy()
                    polar = temp['eyeInWorldFicksPol_calib_excluded'].to_numpy()

                    x_deg = azimuth
                    y_deg = polar


                elif device == 'varjo':

                    if eye == 'right':
                        time, x_forward, y_forward, z_forward = temp['time(s)'].to_numpy(), temp['right_forward_x'].to_numpy(), temp['right_forward_y'].to_numpy(), temp['right_forward_z'].to_numpy()
                        x_deg, y_deg = temp['right_x_deg_calib'].to_numpy(), temp['right_y_deg_calib'].to_numpy()
                    elif eye == 'left':
                        time, x_forward, y_forward, z_forward = temp['time(s)'].to_numpy(), temp['left_forward_x'].to_numpy(), temp['left_forward_y'].to_numpy(), temp['left_forward_z'].to_numpy()
                        x_deg, y_deg = temp['left_x_deg_calib'].to_numpy(), temp['left_y_deg_calib'].to_numpy()

                    x_deg = x_deg.astype(np.float)
                    y_deg = y_deg.astype(np.float)

                # unwrap
                x_deg[~np.isnan(x_deg)] = np.rad2deg(np.unwrap(np.deg2rad(x_deg[~np.isnan(x_deg)])))
                y_deg[~np.isnan(y_deg)] = np.rad2deg(np.unwrap(np.deg2rad(y_deg[~np.isnan(y_deg)])))  

                assert len(time) == len(x_deg) and len(x_deg) == len(y_deg), "Vectors of mismatched length."

                # calculate intersample features (note that velocity includes direction, and speed is just magnitude)
                #iss, isv, issy = [np.NAN], [np.NAN], [np.NAN]
                iss, issy = [np.NAN], [np.NAN]

                for i in range(1, len(time)):
                    if np.isnan(x_deg[i-1]) or np.isnan(x_deg[i]) or np.isnan(y_deg[i-1]) or np.isnan(y_deg[i]):
                        iss.append(np.NAN)
                        #isv.append(np.NAN)
                        issy.append(np.NAN)
                    else:
                        ds = np.sqrt((x_deg[i]-x_deg[i-1])**2+(y_deg[i]-y_deg[i-1])**2)
                        #dv = (x_deg[i]-x_deg[i-1])+(y_deg[i]-y_deg[i-1])
                        s = ds/(time[i]-time[i-1])
                        #v = dv/(time[i]-time[i-1])
                        iss.append(s)
                        #isv.append(v)
                        
                        vy = np.sqrt((y_deg[i]-y_deg[i-1])**2)/(time[i]-time[i-1])
                        issy.append(vy)

#                 iss.append(np.NAN)
#                 isv.append(np.NAN)
#                 isvy.append(np.NAN)
                iss = np.array(iss)
                #isv = np.array(isv)
                issy = np.array(iss)

                # Remove data where iss >= 800 deg/s and iss <= 0 deg/s.
                df = clean_data(pd.DataFrame({'time':time, 
                                                'x_deg':x_deg, 
                                                'y_deg':y_deg,
                                                'iss':iss,
                                                #'isv':isv,
                                                'issy':issy
                                                }))
                
                ID = device+'_'+task+'_'+eye+'_'+subID
                data[ID] = df
                df.to_csv(f'/data/Isabella/thesis_spring2022/event_detect_in/{ID}.csv')
                

In [5]:
# load pre-processed data
print("Part.", "Length of data")
for key, value in data.items():
    print(f"{key}    {len(value)}")

Part. Length of data
PI_walk_combined_P2    10712
PI_walk_combined_P3    26828
PI_walk_combined_P4    22211
PI_walk_combined_P5    17365
PI_walk_combined_P8    13828
PI_walk_combined_P9    17605
PI_walk_combined_P11    9188
PI_walk_combined_P12    10759
PI_walk_combined_P13    9846
PI_walk_combined_P14    14895
PI_walk_combined_P15    18138
PI_walk_combined_P17    7565
PI_walk_combined_P18    13857
PI_walk_combined_P19    23286
PI_walk_combined_P21    10513
PI_walk_combined_P24    11133
PI_cereal_combined_P2    8119
PI_cereal_combined_P3    13311
PI_cereal_combined_P4    13747
PI_cereal_combined_P5    13168
PI_cereal_combined_P6    18342
PI_cereal_combined_P8    13634
PI_cereal_combined_P9    10210
PI_cereal_combined_P10    11414
PI_cereal_combined_P11    13913
PI_cereal_combined_P12    21299
PI_cereal_combined_P13    13247
PI_cereal_combined_P15    11427
PI_cereal_combined_P17    9518
PI_cereal_combined_P18    9816
PI_sandwich_combined_P2    29059
PI_sandwich_combined_P3    34560
PI_s