# Preprocessing 05 - event detection

## Import Libraries

In [1]:
import pandas as pd
import numpy as np
from dataEvaluation.utils.remodnav import perform_remodnav
import scipy.signal as signal
import os
import contextlib
from tqdm.notebook import tqdm
import scipy.ndimage as ndimage

## Eyetracking Event Detection

In [2]:
def count_nan_beginning(data):
    count = 0
    for value in data:
        if np.isnan(value):
            count += 1
        else:
            return count
    return count

def count_nan_end(data):
    count = 0
    for value in data[::-1]:
        if np.isnan(value):
            count += 1
        else:
            return count
    return count

# get where values are nan

def get_coherent_nan(series):
    nan_idx = np.where(np.isnan(series))[0]
    coherent_nans = []
    current_start = None
    last_value = None
    for current_value in nan_idx:
        if current_start is None:
            current_start = current_value
        elif abs(current_value - last_value) != 1:
            coherent_nans.append((current_start, last_value))
            current_start = current_value
        last_value = current_value
    if current_start is not None:
        coherent_nans.append((current_start, last_value))
    return coherent_nans

def get_coherent_non_nan(series):
    non_nan_idx = np.where(np.logical_not(np.isnan(series)))[0]
    coherent_non_nans = []
    current_start = None
    last_value = None
    for current_value in non_nan_idx:
        if current_start is None:
            current_start = current_value
        elif abs(current_value - last_value) != 1:
            coherent_non_nans.append((current_start, last_value))
            current_start = current_value
        last_value = current_value
    if current_start is not None:
        coherent_non_nans.append((current_start, last_value))
    return coherent_non_nans

def interpolate_by_nearest(x, y, max_error_duration=0.1, sampling_rate = 250):
    x = x.copy()
    y = y.copy()

    duration_per_step = 1.0/sampling_rate
    max_number_of_steps = int(max_error_duration/duration_per_step)

    # eliminate single nan values
    changed = True
    while changed:
        changed = False

        x_ranges = [(idx, cur[1]-cur[0], cur) for idx, cur in enumerate(get_coherent_nan(x))]
        y_ranges = [(idx, cur[1]-cur[0], cur) for idx, cur in enumerate(get_coherent_nan(y))]

        x_error_indexes = [idx for idx, cur in enumerate(x_ranges) if cur[1] <= max_number_of_steps]
        y_error_indexes = [idx for idx, cur in enumerate(y_ranges) if cur[1] <= max_number_of_steps]

        # set values to nearest interpolation
        for idx in x_error_indexes:
            min_idx, max_idx = x_ranges[idx][2]
            min_idx = max(0, min_idx-1)
            max_idx = min(len(x), max_idx+2)
            try:
                x[min_idx:max_idx] = pd.Series(x[min_idx:max_idx]).interpolate(method='nearest')
                changed = True
            except:
                pass

        for idx in y_error_indexes:
            min_idx, max_idx = y_ranges[idx][2]
            min_idx = max(0, min_idx-1)
            max_idx = min(len(y), max_idx+2)
            try:
                y[min_idx:max_idx] = pd.Series(y[min_idx:max_idx]).interpolate(method='nearest')
                changed = True
            except:
                pass

    # eliminate single non nan values
    changed = True
    while changed:
        changed = False

        x_ranges = [(idx, cur[1]-cur[0], cur) for idx, cur in enumerate(get_coherent_non_nan(x))]
        y_ranges = [(idx, cur[1]-cur[0], cur) for idx, cur in enumerate(get_coherent_non_nan(y))]

        x_error_indexes = [idx for idx, length, _ in x_ranges if length <= max_number_of_steps]
        y_error_indexes = [idx for idx, length, _ in y_ranges if length <= max_number_of_steps]

        # remove error_index values and set them to nan
        for idx in x_error_indexes:
            min_value, max_value = x_ranges[idx][2]
            x[min_value:max_value+1] = np.nan
            y[min_value:max_value+1] = np.nan
            changed = True

        for idx in y_error_indexes:
            min_value, max_value = y_ranges[idx][2]
            x[min_value:max_value+1] = np.nan
            y[min_value:max_value+1] = np.nan
            changed = True

    nan_x_ranges = [(idx, cur[1]-cur[0], cur) for idx, cur in enumerate(get_coherent_nan(x))]
    nan_y_ranges = [(idx, cur[1]-cur[0], cur) for idx, cur in enumerate(get_coherent_nan(y))]

    try:
        nan_x_values = np.where(np.isnan(x))[0]
        inter_x_values = x.interpolate(method='pad')
        inter_x_values_filter = ndimage.gaussian_filter1d(inter_x_values, sigma=1.0)
        inter_x_values[nan_x_values] = inter_x_values_filter[nan_x_values]
    except:
        inter_x_values = x

    try:
        nan_y_values = np.where(np.isnan(y))[0]
        inter_y_values = y.interpolate(method='pad')
        inter_y_values_filter = ndimage.gaussian_filter1d(inter_y_values, sigma=1.0)
        inter_y_values[nan_y_values] = inter_y_values_filter[nan_y_values]
    except:
        inter_y_values = y

    return inter_x_values, inter_y_values, [data[2] for data in nan_x_ranges], [data[2] for data in nan_y_ranges]

def get_matching_nans(x_nan, y_nan, length):
    data_x = np.zeros(length, dtype=bool)
    data_y = np.zeros(length, dtype=bool)
    for low, high in x_nan:
        data_x[low:high+1] = True
    for low, high in y_nan:
        data_y[low:high+1] = True
    data_mask = np.logical_and(data_x, data_y)
    data = np.zeros(length)
    data[data_mask] = np.nan
    return get_coherent_nan(data)

def remove_interpolated_events(df):
    df_blink = df[df["label"] == "PBlink"]
    df["Keep"] = True
    for idx, row in df.iterrows():
        if row["label"] == "PBlink":
            continue
        start_time = row["start_time"]
        end_time = row["end_time"]
        for blink_idx, blink_row in df_blink.iterrows():
            blink_start_time = blink_row["start_time"]
            blink_end_time = blink_row["end_time"]
            if blink_start_time <= start_time <= blink_end_time:
                df.loc[idx, "Keep"] = False
                break
            if blink_start_time <= end_time <= blink_end_time:
                df.loc[idx, "Keep"] = False
                break
            if start_time <= blink_start_time <= end_time:
                df.loc[idx, "Keep"] = False
                break
            if start_time <= blink_end_time <= end_time:
                df.loc[idx, "Keep"] = False
                break
    df = df[df["Keep"] == True]
    df = df.drop(columns=["Keep"])
    return df

In [3]:
# Read in the Behavioral Data
df_behavioral = pd.read_csv("./data/filteredData/filtered_data.csv")

sampling_rate = 250.0
max_error_duration = 0.1
screen_resolution = (1920, 1080)
screen_size = (56.0, 31.5)
screen_distance = 60.0
x_res = 1920.0
y_res = 1080.0

df_events = pd.DataFrame(columns=["Participant", "Algorithm", "Subpart",
                                  "id", "label", "start_x", "start_y", "end_x", "end_y", "start_time", "end_time",
                                  "amp", "peak_vel", "med_vel", "avg_vel"])
errors = []

for index, row in tqdm(df_behavioral.iterrows(), total=len(df_behavioral)):
    # read in eyetracking file
    df_eyetracking = pd.read_csv(row["Eyetracking"])
    participant = row["Participant"]
    algorithm = row["Algorithm"]
    subpart = row["Subpart"]

    # normalize the time regarding eyetracking to 0
    df_eyetracking["time"] = df_eyetracking["time"].astype(float)
    df_eyetracking["time"] = df_eyetracking["time"] - df_eyetracking["time"].iloc[0]

    # drop unused columns
    df_eyetracking = df_eyetracking.drop(columns=["l_gaze_point_in_user_coordinate_system_x",
                                                  "l_gaze_point_in_user_coordinate_system_y",
                                                  "l_gaze_point_in_user_coordinate_system_z",
                                                  "r_gaze_point_in_user_coordinate_system_x",
                                                  "r_gaze_point_in_user_coordinate_system_y",
                                                  "r_gaze_point_in_user_coordinate_system_z",
                                                  "l_gaze_origin_in_user_coordinate_system_x",
                                                  "l_gaze_origin_in_user_coordinate_system_y",
                                                  "l_gaze_origin_in_user_coordinate_system_z",
                                                  "r_gaze_origin_in_user_coordinate_system_x",
                                                  "r_gaze_origin_in_user_coordinate_system_y",
                                                  "r_gaze_origin_in_user_coordinate_system_z"])

    # convert eyetracking data to display coordinates
    df_eyetracking["l_display_x"] = df_eyetracking["l_display_x"].astype(float) * x_res
    df_eyetracking["l_display_y"] = df_eyetracking["l_display_y"].astype(float) * y_res
    df_eyetracking["r_display_x"] = df_eyetracking["r_display_x"].astype(float) * x_res
    df_eyetracking["r_display_y"] = df_eyetracking["r_display_y"].astype(float) * y_res

    # convert eyetracking data to I2MC valid flags
    df_eyetracking["l_valid"] = df_eyetracking["l_valid"].astype(int)
    df_eyetracking["r_valid"] = df_eyetracking["r_valid"].astype(int)

    # convert miss column to right integer used by I2MC
    df_eyetracking["l_miss_x"] = df_eyetracking.apply(lambda row: row["l_display_x"] < -x_res or row["l_display_x"] > 2 * x_res, axis=1)
    df_eyetracking["l_miss_y"] = df_eyetracking.apply(lambda row: row["l_display_y"] < -y_res or row["l_display_y"] > 2 * y_res, axis=1)
    df_eyetracking["r_miss_x"] = df_eyetracking.apply(lambda row: row["r_display_x"] < -x_res or row["r_display_x"] > 2 * x_res, axis=1)
    df_eyetracking["r_miss_y"] = df_eyetracking.apply(lambda row: row["r_display_y"] < -y_res or row["r_display_y"] > 2 * y_res, axis=1)

    df_eyetracking["l_miss"] = df_eyetracking.apply(lambda row: row["l_miss_x"] or row["l_miss_y"] or not row["l_valid"] >= 1, axis=1)
    df_eyetracking["r_miss"] = df_eyetracking.apply(lambda row: row["r_miss_x"] or row["r_miss_y"] or not row["r_valid"] >= 1, axis=1)

    # Set a default value for missing data
    df_eyetracking.loc[df_eyetracking["l_miss"], "l_display_x"] = np.nan
    df_eyetracking.loc[df_eyetracking["l_miss"], "l_display_y"] = np.nan
    df_eyetracking.loc[df_eyetracking["r_miss"], "r_display_x"] = np.nan
    df_eyetracking.loc[df_eyetracking["r_miss"], "r_display_y"] = np.nan


    # check where l_display_x and l_display_y are NaN
    # interpolate missing data and store nans
    l_display_x, l_display_y, l_x_nan, l_y_nan = interpolate_by_nearest(
        df_eyetracking['l_display_x'], df_eyetracking['l_display_y'],
        max_error_duration, sampling_rate)
    r_display_x, r_display_y, r_x_nan, r_y_nan = interpolate_by_nearest(
        df_eyetracking['r_display_x'], df_eyetracking['r_display_y'],
        max_error_duration, sampling_rate)
    l_nan = get_matching_nans(l_x_nan, l_y_nan, len(df_eyetracking))
    r_nan = get_matching_nans(r_x_nan, r_y_nan, len(df_eyetracking))
    a_nan = get_matching_nans(l_nan, r_nan, len(df_eyetracking))
    a_nan = [(start/sampling_rate, end/sampling_rate, (end-start)/sampling_rate) for start, end in a_nan]

    with open(os.devnull, 'w') as devnull:
        with contextlib.redirect_stdout(devnull):
            with contextlib.redirect_stderr(devnull):
                # average x of both eyes
                avg_x = np.nanmean(np.stack([l_display_x, r_display_x]), axis=0)
                # average y of both eyes
                avg_y = np.nanmean(np.stack([l_display_y, r_display_y]), axis=0)

    # apply moving average filter
    avg_x = signal.medfilt(avg_x, kernel_size=7)
    avg_y = signal.medfilt(avg_y, kernel_size=7)

    nan_beginning = max(count_nan_beginning(avg_x), count_nan_beginning(avg_y))
    nan_end = max(count_nan_end(avg_x), count_nan_end(avg_y), 1)

    avg_x = avg_x[nan_beginning:-nan_end]
    avg_y = avg_y[nan_beginning:-nan_end]

    time_offset = 1.0/sampling_rate * nan_beginning

    try:
        # disallow outputting for next function
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull):
                with contextlib.redirect_stderr(devnull):
                    events, pp, clf = perform_remodnav(
                        avg_x, avg_y,
                        sampling_rate,
                        screen_width=screen_size[0],
                        screen_width_pixels=screen_resolution[0],
                        screen_distance=screen_distance,
                        savgol_length=0.02)
    except Exception as e:
        print(index)
        print(e)

    df_tmp = pd.DataFrame(columns=df_events.columns)
    for cure_idx, value in enumerate(events):
        value["Participant"] = participant
        value["Subpart"] = subpart
        value["Algorithm"] = algorithm
        df_tmp.loc[len(df_tmp)] = value

    for potential_blink in a_nan:
        blink_event = {}
        blink_event["Participant"] = participant
        blink_event["Algorithm"] = algorithm
        blink_event["Subpart"] = subpart
        blink_event["label"] = "PBlink"
        blink_event["start_time"] = potential_blink[0]
        blink_event["end_time"] = potential_blink[1]
        df_tmp.loc[len(df_tmp)] = blink_event

    #df_tmp = remove_interpolated_events(df_tmp)
    df_tmp = df_tmp.reset_index(drop=True)
    # append the non duplicated rows to the final dataframe
    df_events = pd.concat([df_events, df_tmp], ignore_index=True)
    df_events = df_events.reset_index(drop=True)

  0%|          | 0/3270 [00:00<?, ?it/s]

## Export The Data

In [4]:
for (participant, algorithm, subpart), df_group in tqdm(df_events.groupby(["Participant", "Algorithm", "Subpart"])):
    df_group = df_group.reset_index(drop=True)
    df_group = df_group.sort_values(by="start_time")
    # drop columns that are not needed
    df_group = df_group.drop(columns=["Participant", "Algorithm", "Subpart"])
    try:
        os.remove(f"./data/filteredData/Participant{str(participant).zfill(2)}/{algorithm}_eyetracking.csv")
    except:
        pass
    df_group.to_csv(f"./data/filteredData/Participant{str(participant).zfill(2)}/{algorithm}_{subpart}_eyetracking.csv", index=False)

  0%|          | 0/3270 [00:00<?, ?it/s]