# Eye Tracker Data Analysis


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.ticker import FormatStrFormatter

import os
import re

import scipy.stats as stats
import seaborn as sns
import pingouin as pg

## Pre-processing


Clean up raw data from eye tracker software so it's easier to work with:


In [7]:
raw_data_dir = raw_dir = 'Eye Tracker Data/Raw CSVs'
processed_dir = 'Eye Tracker Data/Processed CSVs'

os.makedirs(processed_dir, exist_ok=True)

keywords = {
    'rec_time': 'rec_time',
    'gaze_x': 'Gaze_Gaze X',
    'gaze_y': 'Gaze_Gaze Y',
    'pupil_x': 'Original_Pupil X',
    'pupil_y': 'Original_Pupil Y'
}

for filename in os.listdir(raw_dir):
    if filename.endswith('_CsvData.txt'):
        # extract participant & trial number from text file names
        name_parts = filename.split('_')
        participant = name_parts[0]
        trial_part = name_parts[1].split('. ')[1]
        trial_number = trial_part.split('_')[0]
        new_filename = f"{participant}_{trial_number}.csv"

        input_path = os.path.join(raw_dir, filename)
        output_path = os.path.join(processed_dir, new_filename)

        df = pd.read_csv(input_path, sep='\t', encoding='utf-8', engine='python')
        df.columns = df.columns.str.strip()

        rec_time_col = next((col for col in df.columns if keywords['rec_time'] in col), None)

        # try to find gaze x & y columns first
        x_col = next((col for col in df.columns if keywords['gaze_x'] in col), None)
        y_col = next((col for col in df.columns if keywords['gaze_y'] in col), None)

        # if gaze x & y not found, use pupil x & y columns instead
        if not x_col or not y_col:
            x_col = next((col for col in df.columns if keywords['pupil_x'] in col), None)
            y_col = next((col for col in df.columns if keywords['pupil_y'] in col), None)

        if not rec_time_col or not x_col or not y_col:
            print(f"{filename} missing required columns.")
            continue

        # rename columns
        df_filtered = df[[rec_time_col, x_col, y_col]]
        df_filtered.columns = ['rec_time', 'x', 'y']

        # fix time formatting
        df_filtered.loc[:, 'rec_time'] = df_filtered['rec_time'].astype(str).str.replace('.', ':', regex=False)

        # write new CSV, named by participant name & trial only
        df_filtered.to_csv(output_path, index=False)

## Metric 1: Percentage of Total Fixations


Helpers:


In [None]:
def time_to_ms(t):
    if pd.isna(t) or t == 'N/A':
        return None
    try:
        h, m, s, ms = map(int, t.split(':'))
        return ((h * 3600 + m * 60 + s) * 1000) + ms
    except:
        return None
    
def distance(p1, p2):
    return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

def detect_fixations(df, time_threshold_ms=150, spatial_threshold=50):
    fixations = 0
    start_idx = 0

    while start_idx < len(df):
        if np.isnan(df.iloc[start_idx]['x']) or np.isnan(df.iloc[start_idx]['y']):
            start_idx += 1
            continue

        cluster_points = [(df.iloc[start_idx]['x'], df.iloc[start_idx]['y'])]
        start_time = df.iloc[start_idx]['time_ms']
        end_idx = start_idx + 1

        # this loop to detect fixations also counts smooth pursuits as "fixations"
        # because this is more accurate to what was done in the manual video analysis
        while end_idx < len(df):
            row = df.iloc[end_idx]
            if np.isnan(row['x']) or np.isnan(row['y']):
                break

            new_point = (row['x'], row['y'])
            cluster_points.append(new_point)

            # Calculate dynamic centroid
            centroid = np.mean(cluster_points, axis=0)
            # Check if all points stay within spatial threshold
            if any(distance(p, centroid) > spatial_threshold for p in cluster_points):
                break

            end_time = row['time_ms']
            if end_time - start_time > time_threshold_ms:
                fixations += 1
                break

            end_idx += 1

        start_idx = end_idx + 1

    return fixations

Detect number of fixations for each participant/trial combo, for each relevant window (sign visible window, and pedestrian visible window)


In [24]:
processed_dir = os.path.join("Eye Tracker Data", 'Processed CSVs')
timestamps_file = os.path.join("Eye Tracker Data", 'Eye Tracker Fixation Timestamps.csv')

fixation_df = pd.read_csv(timestamps_file)
fixation_df.columns = fixation_df.columns.str.strip()

for _, row in fixation_df.iterrows():
    participant = row['participant']
    trial = str(row['trial'])

    csv_filename = f"{participant}_{trial}.csv"
    csv_path = os.path.join(processed_dir, csv_filename)

    if not os.path.exists(csv_path):
        continue

    df = pd.read_csv(csv_path)
    df['time_ms'] = df['rec_time'].apply(time_to_ms)

    # handle missing or zero x-y eye position data
    df['x'] = pd.to_numeric(df['x'], errors='coerce')
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    df.replace({0: np.nan}, inplace=True)

    # convert relevant time windows to milliseconds
    sign_start = time_to_ms(row.get('sign_visible_t_start'))
    sign_end = time_to_ms(row.get('sign_visible_t_end'))
    ped_start = time_to_ms(row.get('ped_visible_t_start'))
    ped_end = time_to_ms(row.get('ped_visible_t_end'))

    # extract data by each relevant time window and remove rows with no x/y
    def get_window_df(start, end):
        if start is None or end is None:
            return pd.DataFrame(columns=df.columns)
        window_df = df[(df['time_ms'] >= start) & (df['time_ms'] <= end)]
        return window_df.dropna(subset=['x', 'y'])

    sign_df = get_window_df(sign_start, sign_end)
    ped_df = get_window_df(ped_start, ped_end)

    # count fixations in each window using helper
    sign_fix_count = detect_fixations(sign_df)
    ped_fix_count = detect_fixations(ped_df)

    print(f"{participant} trial {trial} -- Sign window fixations: {sign_fix_count}, Ped window fixations: {ped_fix_count}")

Pete trial 1 -- Sign window fixations: 49, Ped window fixations: 19
Pete trial 2 -- Sign window fixations: 0, Ped window fixations: 0
Pete trial 3 -- Sign window fixations: 0, Ped window fixations: 0
Pete trial 4 -- Sign window fixations: 51, Ped window fixations: 52
Pete trial 5 -- Sign window fixations: 47, Ped window fixations: 24
Pete trial 6 -- Sign window fixations: 0, Ped window fixations: 20
Pete trial 7 -- Sign window fixations: 63, Ped window fixations: 38
Pete trial 8 -- Sign window fixations: 65, Ped window fixations: 29
Pete trial 9 -- Sign window fixations: 52, Ped window fixations: 22
Pete trial 10 -- Sign window fixations: 0, Ped window fixations: 0
Pete trial 11 -- Sign window fixations: 32, Ped window fixations: 22
Pete trial 12 -- Sign window fixations: 81, Ped window fixations: 116
Pete trial 13 -- Sign window fixations: 0, Ped window fixations: 32
Pete trial 14 -- Sign window fixations: 58, Ped window fixations: 0
Pete trial 15 -- Sign window fixations: 72, Ped win