# 1 - File Synchronization

!!! Aim: traslate imotions timestamps into shared timestamps (pupil / world / unix)

##### Imports

In [5]:
import os
import warnings
import json
import pandas as pd
import numpy as np
import scipy as su
import ast
# from typing import Dict
# from scipy.signal import
# from scikit-learn import

import matplotlib.pyplot as plt
import seaborn as sns

import dask.dataframe as dd

##### Data Loading

In [101]:
# Load ddfs into dictionary (exception for 'all_surf_pos', df)
root_dir = './aggregated_data'
def load_data(root_dir, exception=False):    # Get csv file reads into one dictionary
    data = {}
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            file_path = os.path.join(subdir, file)
            file_name = os.path.splitext(file)[0]   #file name without '.csv'
            if file.endswith('.csv') and file != 'all_surf_positions_HiDrive_Studie2.csv':
                data[file_name] = dd.read_csv(file_path)    #read and attach to dict
            elif exception and file == 'all_surf_positions_HiDrive_Studie2.csv':
                data [file_name] = pd.read_csv(file_path, converters=converters)    #read as normal and attach to dict
    return data
def parse(filedata): # Manually read the column
    output = []
    for line in filedata.split('\n'): # split into lines
        line = line.strip().rstrip(']').lstrip('[') #remove whitespace and brackets
        if not line:  
            continue    #skip empty lines
        line = line.split() #split into cell
        row = []
        for cell in line:
            cell = cell.strip()     #remove whitespace
            if not cell.strip():
                continue    #skip empty cells
            row.append(float(cell)) #convert to float and add
        output.append(row)
    return output
converters = {
    "img_to_surf_trans": parse,
    "surf_to_img_trans": parse,
    "dist_img_to_surf_trans": parse,
    "surf_to_dist_img_trans": parse,
}

data = load_data(root_dir)

In [102]:
participant_ids = [1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
ddf = data['all_gui_data']
df = ddf.compute()
df['block_data'] = df['block_data'].apply(lambda x: eval(x))

### Pupil + GUI Data

In [431]:
# P12 as example
p12_df = pd.read_csv("./iMotions/exports/010_vp11.csv", skiprows=36, nrows=50)

In [433]:
# Examine columns
p12_df.columns.tolist()

['Row',
 'Timestamp',
 'EventSource',
 'SlideEvent',
 'StimType',
 'Duration',
 'CollectionPhase',
 'SourceStimuliName',
 'EventSource.1',
 'InputEventSource',
 'Data',
 'StimType.1',
 'EventSource.2',
 'MarkerName',
 'MarkerDescription',
 'MarkerType',
 'SceneType',
 'EventSource.3',
 'CPU Sys',
 'Memory Sys',
 'CPU Proc',
 'Memory Proc',
 'EventSource.4',
 'heartRateState',
 'heartRate',
 'heartRateAvg',
 'heartRateMax',
 'gsr',
 'EventSource.5',
 'hr',
 'dt',
 'touch',
 'EventSource.6',
 'totalTime',
 'totalDistance',
 'totalScrolls',
 'totalClicks',
 'streakTime',
 'streakDistance',
 'streakScrolls',
 'streakClicks',
 'speed',
 'speedAvg',
 'speedMax',
 'clickRate',
 'clickRateAvg',
 'clickRateMax',
 'scrollRate',
 'scrollRateAvg',
 'scrollRateMax',
 'EventSource.7',
 'timestamp_Pupil',
 'confidence',
 'type',
 'EventSource.8',
 'timestamp_Pupil.1',
 'duration',
 'norm_pos_x',
 'norm_pos_y',
 'dispersion',
 'confidence.1',
 'gaze_point_3d_x',
 'gaze_point_3d_y',
 'gaze_point_3d_z',

In [5]:
# Filter for pupilcore-related data
pupil_columns = [
    'Timestamp',        #gui timestamp
    'timestamp_Pupil',
    'confidence',
    'timestamp_Pupil.1',
    'duration',
    'norm_pos_x',
    'norm_pos_y',
    'dispersion',
    'confidence.1',
    'gaze_point_3d_x',
    'gaze_point_3d_y',
    'gaze_point_3d_z',
    'method',
    'timestamp_Pupil.2',
    'gaze_point_3d_x.1',
    'gaze_point_3d_y.1',
    'gaze_point_3d_z.1',
    'norm_pos_x.1',
    'norm_pos_y.1',
    'confidence.2',
    'timestamp_Pupil.3',
    'diameter',
    'norm_pos_x.2',
    'norm_pos_y.2',
    'confidence.3',
]
p_12_filtered = p12_df[pupil_columns].dropna(how='all', subset=[col for col in pupil_columns if col != 'Timestamp']) #remove na values
p_12_filtered['Timestamp'] = p_12_filtered['Timestamp']/1000    #convert gui ts in [s] from [ms]
p_12_filtered

Unnamed: 0,Timestamp,timestamp_Pupil,confidence,timestamp_Pupil.1,duration,norm_pos_x,norm_pos_y,dispersion,confidence.1,gaze_point_3d_x,...,gaze_point_3d_y.1,gaze_point_3d_z.1,norm_pos_x.1,norm_pos_y.1,confidence.2,timestamp_Pupil.3,diameter,norm_pos_x.2,norm_pos_y.2,confidence.3
2,0.041000,,,,,,,,,,...,,,,,,84199.398381,31.465725,0.330163,0.498952,1.000000
3,0.041000,,,,,,,,,,...,,,,,,84199.398381,31.466343,0.330158,0.498931,1.000000
7,0.045000,,,,,,,,,,...,,,,,,84199.402384,31.177860,0.678458,0.647830,0.911918
8,0.045000,,,,,,,,,,...,,,,,,84199.402384,31.176900,0.678448,0.647808,0.911918
10,0.048001,,,84199.04439,302.0865,0.612383,0.720469,1.278331,0.988036,42.955458,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4477080,3235.282725,,,,,,,,,,...,,,,,,87434.637776,26.046050,0.820317,0.688882,0.749247
4477084,3235.286725,,,,,,,,,,...,,,,,,87434.642977,25.676739,0.818556,0.689620,0.758039
4477085,3235.286725,,,,,,,,,,...,,,,,,87434.642977,25.907541,0.818341,0.689437,0.758039
4477086,3235.286725,,,,,,,,,,...,,,,,,87434.642565,22.498596,0.287049,0.441626,1.000000


In [6]:
# Filter for timestamps only
timestamp_columns = [
    'Timestamp',            #gui timestamp
    'timestamp_Pupil',      #general(?)
    'timestamp_Pupil.1',    #fixation
    'timestamp_Pupil.2',    #gaze
    'timestamp_Pupil.3',    #pupil
    ]
p_12_timestamps = p12_df[timestamp_columns].drop_duplicates(subset='timestamp_Pupil.3')    #remove duplicates
p_12_timestamps

Unnamed: 0,Timestamp,timestamp_Pupil,timestamp_Pupil.1,timestamp_Pupil.2,timestamp_Pupil.3
0,3.900030e+01,,,,
2,4.099970e+01,,,,84199.398381
7,4.499990e+01,,,,84199.402384
12,4.900090e+01,,,,84199.406394
17,5.300060e+01,,,,84199.410437
...,...,...,...,...,...
4477062,3.235266e+06,,,,87434.623890
4477076,3.235282e+06,,,,87434.637200
4477079,3.235283e+06,,,,87434.637776
4477084,3.235287e+06,,,,87434.642977


'timestamp_Pupil.3'    looks to be a good candidate --> test how it matches with  pupil_data

In [7]:
# Get pupil data for P12
pupil_data = data['all_pupil_positions']
p_12_pupil = pupil_data.query("participant_id == 12").compute()
p_12_pupil

Unnamed: 0.1,Unnamed: 0,participant_id,pupil_timestamp,world_index,eye_id,confidence,norm_pos_x,norm_pos_y,diameter,method,...,circle_3d_normal_y,circle_3d_normal_z,circle_3d_radius,theta,phi,projected_sphere_center_x,projected_sphere_center_y,projected_sphere_axis_a,projected_sphere_axis_b,projected_sphere_angle
140408,0,12,84203.906368,0,0,0.000000,0.000000,1.000000,0.000000,pye3d 0.3.0 real-time,...,-0.034617,-0.998659,-0.047774,1.605421,-1.532270,153.630944,44.104340,165.629457,165.629457,0.0
140409,1,12,84203.906368,0,0,0.509414,0.633051,0.746689,25.574812,2d c++,...,,,,,,,,,,
140410,2,12,84203.906387,0,1,0.830888,0.368114,0.418411,25.116564,2d c++,...,,,,,,,,,,
140411,3,12,84203.906387,0,1,0.830888,0.367885,0.418541,25.101973,pye3d 0.3.0 real-time,...,0.124280,-0.866838,1.175684,1.446194,-2.079019,137.386084,103.405421,188.893071,188.893071,0.0
140412,4,12,84203.914345,0,1,0.879920,0.367491,0.418787,26.093312,pye3d 0.3.0 real-time,...,0.128873,-0.863226,1.222486,1.441564,-2.085407,137.386084,103.405421,188.893071,188.893071,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68114,178361,12,87442.095992,96569,0,1.000000,0.548687,0.711579,24.137211,2d c++,...,,,,,,,,,,
68115,178362,12,87442.099963,96569,1,1.000000,0.427664,0.461246,25.449694,pye3d 0.3.0 real-time,...,0.082972,-0.811022,1.220616,1.487729,-2.190883,132.767592,98.118544,179.562893,179.562893,0.0
68116,178363,12,87442.099963,96569,1,1.000000,0.427626,0.461303,25.448195,2d c++,...,,,,,,,,,,
68117,178364,12,87442.104046,96569,0,1.000000,0.549126,0.710647,23.703648,2d c++,...,,,,,,,,,,


In [38]:
#
p_12_gui_short = p_12_filtered[['Timestamp', 'timestamp_Pupil.3', 'confidence.3', 'diameter']].dropna()#.drop_duplicates(subset='timestamp_Pupil.3') #remove na values and duplicates
p_12_pupil_short = p_12_pupil[['pupil_timestamp', 'confidence', 'diameter']].dropna()#.drop_duplicates(subset='pupil_timestamp')

In [39]:
p_12_gui_short

Unnamed: 0,Timestamp,timestamp_Pupil.3,confidence.3,diameter
2,0.041000,84199.398381,1.000000,31.465725
3,0.041000,84199.398381,1.000000,31.466343
7,0.045000,84199.402384,0.911918,31.177860
8,0.045000,84199.402384,0.911918,31.176900
12,0.049001,84199.406394,1.000000,31.429163
...,...,...,...,...
4477080,3235.282725,87434.637776,0.749247,26.046050
4477084,3235.286725,87434.642977,0.758039,25.676739
4477085,3235.286725,87434.642977,0.758039,25.907541
4477086,3235.286725,87434.642565,1.000000,22.498596


In [40]:
p_12_pupil_short

Unnamed: 0,pupil_timestamp,confidence,diameter
140408,84203.906368,0.000000,0.000000
140409,84203.906368,0.509414,25.574812
140410,84203.906387,0.830888,25.116564
140411,84203.906387,0.830888,25.101973
140412,84203.914345,0.879920,26.093312
...,...,...,...
68114,87442.095992,1.000000,24.137211
68115,87442.099963,1.000000,25.449694
68116,87442.099963,1.000000,25.448195
68117,87442.104046,1.000000,23.703648


In [41]:
# Similar length (~ 4 seconds difference)
len(p_12_pupil_short) - len(p_12_gui_short)

1398

In [42]:
# Check row matching - pupil_timestamp
p_12_pupil_short['exists_in_gui_short'] = p_12_pupil_short['pupil_timestamp'].isin(p_12_gui_short['timestamp_Pupil.3'])

total_matches = p_12_pupil_short['exists_in_gui_short'].sum()

# Calculate the relative and percentage values
print(f"Total rows: {len(p_12_pupil_short)}")
print(f"Total matching rows: {total_matches}")
print(f"Proportion: {total_matches / len(p_12_pupil_short)}")

Total rows: 1603490
Total matching rows: 1599858
Proportion: 0.9977349406606839


In [43]:
# Identify matching subdataframes
p_12_gui_short = p_12_gui_short.reset_index()
p_12_pupil_short = p_12_pupil_short.reset_index()

matches_gui = p_12_gui_short[
            (p_12_gui_short['timestamp_Pupil.3'] == p_12_pupil_short.iloc[1]['pupil_timestamp']) &
            (p_12_gui_short['confidence.3'] == p_12_pupil_short.iloc[1]['confidence']) &
            (p_12_gui_short['diameter'] == p_12_pupil_short.iloc[1]['diameter'])
        ]
matches_pupil = p_12_pupil_short[
            (p_12_pupil_short['pupil_timestamp'] == p_12_gui_short.iloc[-1]['timestamp_Pupil.3']) &
            (p_12_pupil_short['confidence'] == p_12_gui_short.iloc[-1]['confidence.3']) &
            (p_12_pupil_short['diameter'] == p_12_gui_short.iloc[-1]['diameter'])
        ]
print(f"gui start index: {matches_gui.index}")
print(f"pupil end index: {matches_pupil.index}")

gui start index: Index([2234], dtype='int64')
pupil end index: Index([1599855], dtype='int64')


In [44]:
p_12_gui_cut = p_12_gui_short.iloc[2234:]
p_12_pupil_cut = p_12_pupil_short.iloc[:1599855]

len(p_12_gui_cut) == len(p_12_pupil_cut)

print(len(p_12_gui_cut))
print(len(p_12_pupil_cut))

1599858
1599855


In [45]:
p_12_pupil_cut['exists_in_gui_short'] = p_12_pupil_cut['pupil_timestamp'].isin(p_12_gui_cut['timestamp_Pupil.3']).copy()

total_matches = p_12_pupil_cut['exists_in_gui_short'].sum()

# Calculate the relative and percentage values
print(f"Total rows: {len(p_12_pupil_cut)}")
print(f"Proportion: {total_matches / len(p_12_pupil_cut)}")

Total rows: 1599855
Proportion: 1.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  p_12_pupil_cut['exists_in_gui_short'] = p_12_pupil_cut['pupil_timestamp'].isin(p_12_gui_cut['timestamp_Pupil.3']).copy()


In [46]:
matching_pupil_cut = p_12_pupil_cut[p_12_pupil_cut['pupil_timestamp'].isin(p_12_gui_cut['timestamp_Pupil.3'])]
matching_gui_cut = p_12_gui_cut[p_12_gui_cut['timestamp_Pupil.3'].isin(p_12_pupil_cut['pupil_timestamp'])]

print(len(matching_pupil_cut))
print(len(matching_gui_cut))

1599855
1599856


In [47]:
# Load the data
p_12_gui_cut = p_12_gui_cut.reset_index(drop=True)
p_12_pupil_cut = p_12_pupil_cut.reset_index(drop=True)

# Ensure both DataFrames have unique timestamps
p_12_gui_cut = p_12_gui_cut.drop_duplicates(subset='timestamp_Pupil.3')
p_12_pupil_cut = p_12_pupil_cut.drop_duplicates(subset='pupil_timestamp')

# Identify common timestamps
common_timestamps = pd.Series(list(set(p_12_gui_cut['timestamp_Pupil.3']) & set(p_12_pupil_cut['pupil_timestamp'])))

# Filter both DataFrames to keep only the matching rows
matching_pupil_cut = p_12_pupil_cut[p_12_pupil_cut['pupil_timestamp'].isin(common_timestamps)]
matching_gui_cut = p_12_gui_cut[p_12_gui_cut['timestamp_Pupil.3'].isin(common_timestamps)]

# Ensure both DataFrames are sorted by the timestamp for a consistent merge
matching_pupil_cut = matching_pupil_cut.sort_values(by='pupil_timestamp').reset_index(drop=True)
matching_gui_cut = matching_gui_cut.sort_values(by='timestamp_Pupil.3').reset_index(drop=True)

# Print the lengths to verify they match
print(len(matching_pupil_cut))
print(len(matching_gui_cut))

795975
795975


In [48]:
# duplicates because of 2d and 3d methods??

In [49]:
# Merge the filtered DataFrames on the timestamp columns
merged_df = pd.merge(
    matching_pupil_cut,
    matching_gui_cut,
    left_on='pupil_timestamp',
    right_on='timestamp_Pupil.3',
    suffixes=('_pupil', '_gui')
)

# Display the merged DataFrame
print(merged_df.head())

# Calculate the number of matches
total_matches = len(merged_df)

# Calculate the total number of rows in the pupil DataFrame
total_pupil_rows = len(p_12_pupil_cut)

# Calculate the relative and percentage values
relative_value = total_matches / total_pupil_rows
percentage_value = relative_value * 100

# Display the results
print(f"Total matches: {total_matches}")
print(f"Relative value: {relative_value:.4f}")
print(f"Percentage value: {percentage_value:.2f}%")

   index_pupil  pupil_timestamp  confidence  diameter_pupil  \
0       140408     84203.906368    0.000000        0.000000   
1       140410     84203.906387    0.830888       25.116564   
2       140412     84203.914345    0.879920       26.093312   
3       140414     84203.914393    0.683800       25.702860   
4       140416     84203.922351    0.768226       26.382033   

   exists_in_gui_short  index_gui  Timestamp  timestamp_Pupil.3  confidence.3  \
0                 True       6222   4.549085       84203.906368      0.509414   
1                 True       6224   4.549085       84203.906387      0.830888   
2                 True       6233   4.557085       84203.914345      0.879920   
3                 True       6230   4.556083       84203.914393      0.683800   
4                 True       6241   4.565085       84203.922351      0.768226   

   diameter_gui  
0     25.574812  
1     25.116564  
2     26.106041  
3     25.702860  
4     26.290651  
Total matches: 795975
Rela

In [50]:
# Timestamp offset between pupil and gui
offset_mean = np.mean(merged_df['pupil_timestamp'] - merged_df['Timestamp'])
offset_std = np.std(merged_df['pupil_timestamp'] - merged_df['Timestamp'])

print(f"Mean difference: {offset_mean}")
print(f"Std difference: {offset_std}")

Mean difference: 84199.35706756965
Std difference: 0.0008800566533955995


In [51]:
# Matching difference for timestamp
diff_ts_mean = np.mean(merged_df['pupil_timestamp'] - merged_df['timestamp_Pupil.3'])
diff_ts_std = np.std(merged_df['pupil_timestamp'] - merged_df['timestamp_Pupil.3'])

print(f"Mean difference: {diff_ts_mean}")
print(f"Std difference: {diff_ts_std}")

Mean difference: 0.0
Std difference: 0.0


In [52]:
# Matching difference for confidence
diff_con_mean = np.mean(merged_df['confidence'] - merged_df['confidence.3'])
diff_con_std = np.std(merged_df['confidence'] - merged_df['confidence.3'])

print(f"Mean difference: {diff_con_mean}")
print(f"Std difference: {diff_con_std}")

Mean difference: 0.00017119523849236568
Std difference: 0.03657192492162322


In [53]:
# Matching difference for diameter
merged_df = merged_df.query('diameter_pupil < 50')

diff_diam_mean = np.mean(merged_df['diameter_pupil'] - merged_df['diameter_gui'])
diff_diam_med = np.median(merged_df['diameter_pupil'] - merged_df['diameter_gui'])
diff_diam_std = np.std(merged_df['diameter_pupil'] - merged_df['diameter_gui'])

print(f"Mean difference: {diff_diam_mean}")
print(f"Median difference: {diff_diam_med}")
print(f"Std difference: {diff_diam_std}")

Mean difference: 0.09029422358024983
Median difference: 0.0
Std difference: 2.1535421483199126


### System time
- there is no system time in the iMotions data

System time in some markers (?)

In [53]:
# P12 as example
p12_df = pd.read_csv("./iMotions/exports/010_vp11.csv", skiprows=36)
p12_df.columns.tolist()

['Row',
 'Timestamp',
 'EventSource',
 'SlideEvent',
 'StimType',
 'Duration',
 'CollectionPhase',
 'SourceStimuliName',
 'EventSource.1',
 'InputEventSource',
 'Data',
 'StimType.1',
 'EventSource.2',
 'MarkerName',
 'MarkerDescription',
 'MarkerType',
 'SceneType',
 'EventSource.3',
 'CPU Sys',
 'Memory Sys',
 'CPU Proc',
 'Memory Proc',
 'EventSource.4',
 'heartRateState',
 'heartRate',
 'heartRateAvg',
 'heartRateMax',
 'gsr',
 'EventSource.5',
 'hr',
 'dt',
 'touch',
 'EventSource.6',
 'totalTime',
 'totalDistance',
 'totalScrolls',
 'totalClicks',
 'streakTime',
 'streakDistance',
 'streakScrolls',
 'streakClicks',
 'speed',
 'speedAvg',
 'speedMax',
 'clickRate',
 'clickRateAvg',
 'clickRateMax',
 'scrollRate',
 'scrollRateAvg',
 'scrollRateMax',
 'EventSource.7',
 'timestamp_Pupil',
 'confidence',
 'type',
 'EventSource.8',
 'timestamp_Pupil.1',
 'duration',
 'norm_pos_x',
 'norm_pos_y',
 'dispersion',
 'confidence.1',
 'gaze_point_3d_x',
 'gaze_point_3d_y',
 'gaze_point_3d_z',

In [64]:
p12_markers = p12_df[['MarkerName', 'MarkerDescription', 'MarkerType', 'SceneType']].dropna()

In [58]:
p12_markers = p12_df[['CPU Sys', 'Memory Sys', 'CPU Proc', 'Memory Proc']].dropna(how='all')
p12_markers

Unnamed: 0,CPU Sys,Memory Sys,CPU Proc,Memory Proc
244,46.131298,30.160013,3.596240,304.886719
1668,40.824265,30.249884,2.345356,304.882812
3090,43.537945,30.151293,1.741795,305.542969
4517,45.875221,30.153638,4.849431,306.562500
5955,43.622353,30.184117,3.154122,306.917969
...,...,...,...,...
4470991,49.956665,37.906523,8.680098,2004.230469
4472404,59.006054,37.922099,11.433339,2018.882812
4473813,55.223621,37.975622,9.527666,2005.019531
4475215,51.053535,37.934981,7.362769,2019.519531


In [60]:
p12_markers = p12_df[['EventSource', 'SlideEvent', 'StimType', 'Duration', 'CollectionPhase']].dropna(how='all')
p12_markers

Unnamed: 0,EventSource,SlideEvent,StimType,Duration,CollectionPhase
0,1.0,StartSlide,TestImage,300000000.0,StimuliDisplay
7642,1.0,StartMedia,TestImage,300000000.0,StimuliDisplay
4477084,1.0,EndMedia,TestImage,300000000.0,StimuliDisplay
4477087,1.0,EndSlide,TestImage,300000000.0,StimuliDisplay


In [65]:
p12_markers = p12_df[['Timestamp RAW', 'Timestamp CAL', 'System Timestamp CAL', 'Timestamp RAW.1', 'Timestamp CAL.1', 'System Timestamp CAL.1']].dropna(how='all')
p12_markers

Unnamed: 0,Timestamp RAW,Timestamp CAL,System Timestamp CAL,Timestamp RAW.1,Timestamp CAL.1,System Timestamp CAL.1
1,437406.0,6.082465e+09,1.717180e+12,,,
4,437470.0,6.082465e+09,1.717180e+12,,,
5,,,,7613279.0,2.679172e+06,1.711100e+12
6,437534.0,6.082465e+09,1.717180e+12,,,
9,437598.0,6.082465e+09,1.717180e+12,,,
...,...,...,...,...,...,...
4477075,5786462.0,6.085700e+09,1.717183e+12,,,
4477078,5786526.0,6.085700e+09,1.717183e+12,,,
4477081,5786590.0,6.085700e+09,1.717183e+12,,,
4477082,,,,12962399.0,5.914414e+06,1.711103e+12


In [66]:
p12_markers = p12_df[['System Timestamp CAL', 'System Timestamp CAL.1']].dropna(how='all')
p12_markers

Unnamed: 0,System Timestamp CAL,System Timestamp CAL.1
1,1.717180e+12,
4,1.717180e+12,
5,,1.711100e+12
6,1.717180e+12,
9,1.717180e+12,
...,...,...
4477075,1.717183e+12,
4477078,1.717183e+12,
4477081,1.717183e+12,
4477082,,1.711103e+12


In [68]:
# P13
p13_df = pd.read_csv("./iMotions/exports/011_vp12.csv", skiprows=36)
p13_markers = p13_df[['System Timestamp CAL', 'System Timestamp CAL.1']].dropna(how='all')
p13_markers

Unnamed: 0,System Timestamp CAL,System Timestamp CAL.1
1,,1.711110e+12
2,,1.711110e+12
5,,1.711110e+12
6,,1.711110e+12
7,,1.711110e+12
...,...,...
2958472,,1.711113e+12
2958477,,1.711113e+12
2958483,,1.711113e+12
2958494,,1.711113e+12


### Through timestamps

##### w/ pupil data
- Average accuracy ~1 ms
- Low accuracy in P18 and P20 (~4.5 ms)
- Undoable from P01 to P07

In [8]:
# Check columns existance
file_names =  {
        1: '001_vp01',
        2: '001_vp02',
        3: '-',
        4: '002_vp03',
        5: '003_vp04',
        6: '004_vp05',
        7: '005_vp06',
        8: '006_vp07',
        9: '007_vp08',
        10: '008_vp09',
        11: '009_vp10',
        12: '010_vp11',
        13: '011_vp12',
        14: '012_vp13',
        15: '013_vp14',
        16: '014_vp15',
        17: '015_vp16',
        18: '016_vp17',
        19: '017_vp18',
        20: '018_vp19',
}
required_columns = ['Timestamp', 'timestamp_Pupil.3', 'confidence.3', 'diameter']
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

print("MISSING COLUMNS:")
for pid in participant_ids:
    file_path = f"./iMotions/exports/{file_names[pid]}.csv"
    with open(file_path, 'r') as f:
        lines = f.readlines()
    start_idx = next(i for i, line in enumerate(lines) if line.startswith("Row,"))
    p_raw_gui = pd.read_csv(file_path, skiprows=start_idx, nrows=50)
    missing_columns = [col for col in required_columns if col not in p_raw_gui.columns]

    print(f"P{pid}: {missing_columns}") if missing_columns else print(f"P{pid}: None")

MISSING COLUMNS:
P1: None
P2: None
P4: ['timestamp_Pupil.3', 'confidence.3', 'diameter']
P5: ['timestamp_Pupil.3', 'confidence.3', 'diameter']
P6: ['timestamp_Pupil.3', 'confidence.3', 'diameter']
P7: ['timestamp_Pupil.3', 'confidence.3', 'diameter']
P8: None
P9: None
P10: None
P11: None
P12: None
P13: None
P14: None
P15: None
P16: None
P17: None
P18: None
P19: None
P20: None


In [18]:
# Run

file_names =  {
        1: '001_vp01',
        2: '001_vp02',
        3: '-',
        4: '002_vp03',
        5: '003_vp04',
        6: '004_vp05',
        7: '005_vp06',
        8: '006_vp07',
        9: '007_vp08',
        10: '008_vp09',
        11: '009_vp10',
        12: '010_vp11',
        13: '011_vp12',
        14: '012_vp13',
        15: '013_vp14',
        16: '014_vp15',
        17: '015_vp16',
        18: '016_vp17',
        19: '017_vp18',
        20: '018_vp19',
}
required_columns = ['Timestamp', 'timestamp_Pupil.3', 'confidence.3', 'diameter']
new_col_names = {
    'Timestamp': 'gui_timestamp',
    'timestamp_Pupil.3': 'pupil_timestamp',
    'confidence.3': 'confidence'
    }
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

for pid in participant_ids:
    # Prepare gui data
    file_path = f"./iMotions/exports/{file_names[pid]}.csv"
    with open(file_path, 'r') as f:
        lines = f.readlines()
    start_idx = next(i for i, line in enumerate(lines) if line.startswith("Row,"))
    p_raw_gui = pd.read_csv(file_path, skiprows=start_idx)
    if all(col in p_raw_gui.columns for col in required_columns):
        p_gui = p_raw_gui[required_columns].dropna()
    else:
        print(f"!!! PARTICIPANT {pid} doesn't contain required columns")
        continue
    p_gui['Timestamp'] = p_gui['Timestamp'] /1000
    p_gui = p_gui.rename(columns=new_col_names)

    # Prepare pupil data
    p_pupil = data['all_pupil_positions'].query(f"participant_id == {pid}").loc[:, ['pupil_timestamp', 'confidence', 'diameter']].compute().dropna()

    # Trim and sort
    p_gui_trimsort= p_gui[p_gui['pupil_timestamp'] <= p_pupil['pupil_timestamp'].max()].sort_values('pupil_timestamp')
    p_pupil_trimsort= p_pupil[p_pupil['pupil_timestamp'] >= p_gui['pupil_timestamp'].min()].sort_values('pupil_timestamp')

    # Merge
    p_merged = pd.merge(
        p_pupil,
        p_gui,
        on=['pupil_timestamp', 'confidence', 'diameter'],
        how='inner'  # Use 'inner' to keep only the matching rows
        )

    # Merge as of
    p_merged_asof = pd.merge_asof(
        p_pupil_trimsort,
        p_gui_trimsort,
        on='pupil_timestamp',
        by=['confidence', 'diameter'],
        tolerance=0.0015,
        direction='nearest'
        )

    # Compute offset between pupil and gui timestamps
    offset_mean = np.mean(p_merged['pupil_timestamp'] - p_merged['gui_timestamp'])
    offset_std = np.std(p_merged['pupil_timestamp'] - p_merged['gui_timestamp'])
    offset_mean_asof = np.mean(p_merged_asof['pupil_timestamp'] - p_merged_asof['gui_timestamp'])
    offset_std_asof = np.std(p_merged_asof['pupil_timestamp'] - p_merged_asof['gui_timestamp'])

    # Print results
    print("-----------------------------------------------------")
    print()

    print(f"PARTICIPANT {pid}")

    print(f"Len of gui df: {len(p_gui)}")
    print(f"Len of pupil df: {len(p_pupil)}")
    print(f"Len of gui_trimsort df: {len(p_gui_trimsort)}")
    print(f"Len of pupil_trimsort df: {len(p_pupil_trimsort)}")

    print()

    print(f"Len of merge df: {len(p_merged)}")
    print(f"Len of merge_asof df: {len(p_merged_asof)}")
    print(f"Mean offset merged: {offset_mean}")
    print(f"Std offset merged: {offset_std}")
    print(f"Mean offset merged_asof: {offset_mean_asof}")
    print(f"Std offset merged_asof: {offset_std_asof}")

    print()
    
print("-----------------------------------------------------")


-----------------------------------------------------

PARTICIPANT 1
Len of gui df: 70
Len of pupil df: 2000704
Len of gui_trimsort df: 70
Len of pupil_trimsort df: 2000704

Len of merge df: 0
Len of merge_asof df: 2000704
Mean offset merged: nan
Std offset merged: nan
Mean offset merged_asof: nan
Std offset merged_asof: nan

-----------------------------------------------------

PARTICIPANT 2
Len of gui df: 25
Len of pupil df: 1516112
Len of gui_trimsort df: 25
Len of pupil_trimsort df: 1512092

Len of merge df: 28
Len of merge_asof df: 1512092
Mean offset merged: -665617.6272787644
Std offset merged: 890.6916427565357
Mean offset merged_asof: -665911.1086740455
Std offset merged_asof: 779.582300358675

!!! PARTICIPANT 4 doesn't contain required columns
!!! PARTICIPANT 5 doesn't contain required columns
!!! PARTICIPANT 6 doesn't contain required columns
!!! PARTICIPANT 7 doesn't contain required columns
-----------------------------------------------------

PARTICIPANT 8
Len of gui df

##### w/ gaze data
- Average accuracy ~11 ms
- Low accuracy in P04, P05, P08, P09 and P16 (~16 ms)

In [14]:
# Check columns existance

file_names =  {
        1: '001_vp01',
        2: '001_vp02',
        3: '-',
        4: '002_vp03',
        5: '003_vp04',
        6: '004_vp05',
        7: '005_vp06',
        8: '006_vp07',
        9: '007_vp08',
        10: '008_vp09',
        11: '009_vp10',
        12: '010_vp11',
        13: '011_vp12',
        14: '012_vp13',
        15: '013_vp14',
        16: '014_vp15',
        17: '015_vp16',
        18: '016_vp17',
        19: '017_vp18',
        20: '018_vp19',
}
required_columns = [
    'Timestamp',
    'timestamp_Pupil.2',
    'confidence.2',
    'norm_pos_x.1',
    'norm_pos_y.1',
]
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

print("MISSING COLUMNS:")
for pid in participant_ids:
    file_path = f"./iMotions/exports/{file_names[pid]}.csv"
    with open(file_path, 'r') as f:
        lines = f.readlines()
    start_idx = next(i for i, line in enumerate(lines) if line.startswith("Row,"))
    p_raw_gui = pd.read_csv(file_path, skiprows=start_idx, nrows=100)
    missing_columns = [col for col in required_columns if col not in p_raw_gui.columns]

    print(f"P{pid}: {missing_columns}") if missing_columns else print(f"P{pid}: None")

MISSING COLUMNS:
P1: None
P2: None
P4: None
P5: None
P6: None
P7: None
P8: None
P9: None
P10: None
P11: None
P12: None
P13: None
P14: None
P15: None
P16: None
P17: None
P18: None
P19: None
P20: None


In [17]:
# Run

file_names =  {
        1: '001_vp01',
        2: '001_vp02',
        3: '-',
        4: '002_vp03',
        5: '003_vp04',
        6: '004_vp05',
        7: '005_vp06',
        8: '006_vp07',
        9: '007_vp08',
        10: '008_vp09',
        11: '009_vp10',
        12: '010_vp11',
        13: '011_vp12',
        14: '012_vp13',
        15: '013_vp14',
        16: '014_vp15',
        17: '015_vp16',
        18: '016_vp17',
        19: '017_vp18',
        20: '018_vp19',
}
required_columns = [
    'Timestamp',
    'timestamp_Pupil.2',
    'confidence.2',
    'norm_pos_x.1',
    'norm_pos_y.1',
]
new_col_names = {
    'Timestamp': 'gui_timestamp',
    'timestamp_Pupil.2': 'gaze_timestamp',
    'confidence.2': 'confidence',
    'norm_pos_x.1': 'norm_pos_x',
    'norm_pos_y.1': 'norm_pos_y',
    }
required_gaze_columns = [
    'gaze_timestamp',
    'confidence',
    'norm_pos_x',
    'norm_pos_y',
]
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

for pid in participant_ids:
    # Prepare gui data
    file_path = f"./iMotions/exports/{file_names[pid]}.csv"
    with open(file_path, 'r') as f:
        lines = f.readlines()
    start_idx = next(i for i, line in enumerate(lines) if line.startswith("Row,"))
    p_raw_gui = pd.read_csv(file_path, skiprows=start_idx)
    if all(col in p_raw_gui.columns for col in required_columns):
        p_gui = p_raw_gui[required_columns].dropna()
    else:
        print(f"!!! PARTICIPANT {pid} doesn't contain required columns")
        continue
    p_gui['Timestamp'] = p_gui['Timestamp'] /1000
    p_gui = p_gui.rename(columns=new_col_names)
    if pid == 2:
        p_gui = p_gui[p_gui['gui_timestamp'] > 2000]

    # Prepare gaze data
    p_gaze = data['all_gaze_positions'].query(f"participant_id == {pid}").loc[:, required_gaze_columns].compute().dropna()

    # Trim and sort
    p_gui_trimsort= p_gui[p_gui['gaze_timestamp'] <= p_gaze['gaze_timestamp'].max()].sort_values('gaze_timestamp')
    p_gaze_trimsort= p_gaze[p_gaze['gaze_timestamp'] >= p_gui['gaze_timestamp'].min()].sort_values('gaze_timestamp')

    # Merge
    p_merged = pd.merge(
        p_gaze,
        p_gui,
        on=['gaze_timestamp', 'confidence', 'norm_pos_x', 'norm_pos_y'],
        how='inner'  # Use 'inner' to keep only the matching rows
        )

    # Merge as of
    p_merged_asof = pd.merge_asof(
        p_gaze_trimsort,
        p_gui_trimsort,
        on='gaze_timestamp',
        by=['confidence', 'norm_pos_x', 'norm_pos_y'],
        tolerance=0.0015,
        direction='nearest'
        )

    # Compute offset betweep_gaze and gui timestamps
    offset_mean = np.mean(p_merged['gaze_timestamp'] - p_merged['gui_timestamp'])
    offset_std = np.std(p_merged['gaze_timestamp'] - p_merged['gui_timestamp'])
    offset_mean_asof = np.mean(p_merged_asof['gaze_timestamp'] - p_merged_asof['gui_timestamp'])
    offset_std_asof = np.std(p_merged_asof['gaze_timestamp'] - p_merged_asof['gui_timestamp'])

    # Print results
    print("-----------------------------------------------------")
    print()

    print(f"PARTICIPANT {pid}")

    print(f"Len of gui df: {len(p_gui)}")
    print(f"Len of pupil df: {len(p_gaze)}")
    print(f"Len of gui_trimsort df: {len(p_gui_trimsort)}")
    print(f"Len of pupil_trimsort df: {len(p_gaze_trimsort)}")

    print()

    print(f"Len of merge df: {len(p_merged)}")
    print(f"Len of merge_asof df: {len(p_merged_asof)}")
    print(f"Mean offset merged: {offset_mean}")
    print(f"Std offset merged: {offset_std}")
    print(f"Mean offset merged_asof: {offset_mean_asof}")
    print(f"Std offset merged_asof: {offset_std_asof}")

    print()
    
print("-----------------------------------------------------")

-----------------------------------------------------

PARTICIPANT 1
Len of gui df: 1711
Len of pupil df: 1000351
Len of gui_trimsort df: 1711
Len of pupil_trimsort df: 1000351

Len of merge df: 1630
Len of merge_asof df: 1000351
Mean offset merged: -672862.8254689132
Std offset merged: 0.010101152431444823
Mean offset merged_asof: -672862.8254689132
Std offset merged_asof: 0.010101152431444825

-----------------------------------------------------

PARTICIPANT 2
Len of gui df: 32674
Len of pupil df: 758055
Len of gui_trimsort df: 31442
Len of pupil_trimsort df: 758055

Len of merge df: 31252
Len of merge_asof df: 758055
Mean offset merged: -664542.8550160618
Std offset merged: 49.69331566162964
Mean offset merged_asof: -664542.8550160618
Std offset merged_asof: 49.69331566162964

-----------------------------------------------------

PARTICIPANT 4
Len of gui df: 778077
Len of pupil df: 762752
Len of gui_trimsort df: 778077
Len of pupil_trimsort df: 762752

Len of merge df: 762104
Len 

### Through logging files and timestamps

##### w/ pupil data
- Average accuracy ~2 ms
- Low accuracy in P12, P16 (~8 ms) and P15 (~24 ms)
- Undoable from P01 to P07

In [183]:
participant_id = [20]

In [184]:
# Run half
log_json_names = {
    1: 'p01',
    2: 'p02',
    3: '-',
    4: 'p04',
    5: 'p05',
    6: 'p06',
    7: 'p07',
    8: 'p08',
    9: 'p09',
    10: 'p10',
    11: 'p11',
    12: 'p12',
    13: 'p13',
    14: 'p14',
    15: 'p15',
    16: 'p16',
    17: 'p17',
    18: 'p18',
    19: 'p19',
    20: 'p20',
}
column_names = ["UTC_time", "markerName"]
column_names.extend([f"Column{i+1}" for i in range(3, 20)])
marker_names = [
    'Start_Task',
    'Task',
    'on_item_clicked',
    'view6',
    'view2',
    'button_click2',
    'Result',
    'view1',
    'view3',
    'view4',
    'view5',
    'button_click3',
    'button_click1',
    'Stop_Tasks'
    ]
file_names =  {
    1: '001_vp01',
    2: '001_vp02',
    3: '-',
    4: '002_vp03',
    5: '003_vp04',
    6: '004_vp05',
    7: '005_vp06',
    8: '006_vp07',
    9: '007_vp08',
    10: '008_vp09',
    11: '009_vp10',
    12: '010_vp11',
    13: '011_vp12',
    14: '012_vp13',
    15: '013_vp14',
    16: '014_vp15',
    17: '015_vp16',
    18: '016_vp17',
    19: '017_vp18',
    20: '018_vp19',
}
required_columns = ['Timestamp', 'timestamp_Pupil.3', 'confidence.3', 'diameter']
new_col_names = {
'Timestamp': 'gui_timestamp',
'timestamp_Pupil.3': 'pupil_timestamp',
'confidence.3': 'confidence'
}
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

for pid in participant_id:
    # Prepare Log data
    log_file_path = f"./iMotions/logging/{log_json_names[pid]}.log"
    p_log = pd.read_csv(log_file_path, sep='\t', names=column_names, header=None)[["UTC_time", "markerName"]]
    p_log['UTC_time'] = pd.to_datetime(p_log['UTC_time'], format='%Y-%m-%d_%H:%M:%S.%f')
    p_log['world_timestamp'] = p_log['UTC_time'].apply(lambda x: pd.Timestamp(x).tz_localize('Europe/Berlin').timestamp())
    p_log = p_log[p_log['markerName'].isin(marker_names)].reset_index(drop=True)

    # Prepare Gui data for log
    gui_file_path = f"./iMotions/exports/{file_names[pid]}.csv"
    with open(gui_file_path, 'r') as f:
        lines = f.readlines()
    start_idx = next(i for i, line in enumerate(lines) if line.startswith("Row,"))
    p_raw_gui = pd.read_csv(gui_file_path, skiprows=start_idx)
    p_raw_gui['Timestamp'] = p_raw_gui['Timestamp'] /1000
    p_gui_log = p_raw_gui[['Timestamp', 'MarkerName']].rename(columns={'Timestamp': 'gui_timestamp'})
    p_gui_log = p_gui_log[p_gui_log['MarkerName'].isin(marker_names)].reset_index(drop=True)

    # Merge log data with gui_log
    merged_log_gui_df = pd.merge(
            p_log,
            p_gui_log,
            left_index=True,
            right_index=True
            )
    discrepancies = (merged_log_gui_df['markerName'] != merged_log_gui_df['MarkerName']).sum()

    # Compute world to gui timestamp offset
    wts_to_gui_offset_mean = np.mean(merged_log_gui_df['world_timestamp'] - merged_log_gui_df['gui_timestamp'])
    wts_to_gui_offset_std = np.std(merged_log_gui_df['world_timestamp'] - merged_log_gui_df['gui_timestamp'])

    
    
    # Prepare Gui data for pupil
    if all(col in p_raw_gui.columns for col in required_columns):
        p_gui_pp = p_raw_gui[required_columns].dropna()
    else:
        print(f"!!! PARTICIPANT {pid} doesn't contain required columns")
        continue 
    p_gui_pp = p_gui_pp.rename(columns=new_col_names)
    p_gui_pp['world_timestamp'] = p_gui_pp['gui_timestamp'] + wts_to_gui_offset_mean
    p_gui_pp['datetime'] = pd.to_datetime(p_gui_pp['world_timestamp'], unit='s')
    p_gui_pp['datetime'] = p_gui_pp['datetime'].dt.tz_localize('UTC')
    p_gui_pp['datetime'] = p_gui_pp['datetime'].dt.tz_convert('Europe/Berlin')

    # Prepare pupil data
    with open(f"./info_players/{log_json_names[pid]}.json") as file:
        meta_info = json.load(file)
    start_timestamp_diff = meta_info["start_time_system_s"] - meta_info["start_time_synced_s"]
    p_pupil = data['all_pupil_positions'].query(f"participant_id == {pid}").loc[:, ['pupil_timestamp', 'confidence', 'diameter']].compute().dropna()
    p_pupil["world_timestamp"] = p_pupil["pupil_timestamp"] + start_timestamp_diff
    p_pupil['datetime'] = pd.to_datetime(p_pupil['world_timestamp'], unit='s')
    p_pupil['datetime'] = p_pupil['datetime'].dt.tz_localize('UTC')
    p_pupil['datetime'] = p_pupil['datetime'].dt.tz_convert('Europe/Berlin')

    # Trim and sort
    common_min = max(p_gui_pp['world_timestamp'].min(), p_pupil['world_timestamp'].min())
    common_max = min(p_gui_pp['world_timestamp'].max(), p_pupil['world_timestamp'].max())
    p_gui_pp_trimsort = p_gui_pp[(p_gui_pp['world_timestamp'] >= common_min) & (p_gui_pp['world_timestamp'] <= common_max)].sort_values(by='world_timestamp')
    p_pupil_trimsort = p_pupil[(p_pupil['world_timestamp'] >= common_min) & (p_pupil['world_timestamp'] <= common_max)].sort_values(by='world_timestamp')    

In [None]:
# Run other half
tolerances_pp = {
    1: 0.4,
    2: 0.1,
    3: "-",
    4: "-",
    5: "-",
    6: "-",
    7: "-",
    8: 0.03,
    9: 0.03,
    10: 0.03,
    11: 0.03,
    12: 0.03,
    13: 0.03,
    14: 0.03,
    15: 0.3,
    16: 0.03,
    17: 0.2,
    18: 0.03,
    19: 0.03,
    20: 0.03,
}

for pid in participant_id:
    # Merge as of
    p_merged_asof = pd.merge_asof(
        p_pupil_trimsort,
        p_gui_pp_trimsort,
        on='world_timestamp',
        by=['confidence', 'diameter'],
        tolerance= tolerances_pp[pid],
        direction='nearest'
        )
    na_values = p_merged_asof['gui_timestamp'].isna().sum()

    # Compute offset between pupil and gui timestamps
    offset_mean_asof = np.mean(p_merged_asof['world_timestamp'] - p_merged_asof['gui_timestamp'])
    offset_std_asof = np.std(p_merged_asof['world_timestamp'] - p_merged_asof['gui_timestamp'])

    # Print results
    print("-----------------------------------------------------")
    print()

    print(f"PARTICIPANT {pid}")

    print("-> From logging to gui")
    print(f"Len of log df: {len(p_log)}")
    print(f"Len of gui df for log: {len(p_gui_log)}")
    print(f"Marker discrepancies: {discrepancies}")
    print(f"Offset mean log to gui: {wts_to_gui_offset_mean}")
    print(f"Offset std log to gui: {wts_to_gui_offset_std}")

    print()

    print("-> From gui to pupil")
    print(f"Len of gui df: {len(p_gui_pp)}")
    print(f"Len of pupil df: {len(p_pupil)}")
    print(f"Len of gui_trimsort df: {len(p_gui_pp_trimsort)}")
    print(f"Len of pupil_trimsort df: {len(p_pupil_trimsort)}")

    print()

    print("-> Results")
    print(f"Tolerance: {tolerances_pp[pid]}")
    print(f"Len of merge_asof df: {len(p_merged_asof)}")
    print(f"Matched values in x (pupil): {len(p_merged_asof) - na_values}")
    print(f"Mean offset merged_asof: {offset_mean_asof}")
    print(f"Std offset merged_asof: {offset_std_asof}")

    print()

    print("-----------------------------------------------------")

##### w/ gaze data
- Average accuracy ~10 ms
- Low accuracy in P15 (~26 ms)

In [375]:
participant_id = [1]

In [376]:
# Run half
log_json_names = {
    1: 'p01',
    2: 'p02',
    3: '-',
    4: 'p04',
    5: 'p05',
    6: 'p06',
    7: 'p07',
    8: 'p08',
    9: 'p09',
    10: 'p10',
    11: 'p11',
    12: 'p12',
    13: 'p13',
    14: 'p14',
    15: 'p15',
    16: 'p16',
    17: 'p17',
    18: 'p18',
    19: 'p19',
    20: 'p20',
}
column_names = ["UTC_time", "markerName"]
column_names.extend([f"Column{i+1}" for i in range(3, 20)])
marker_names = [
    'Start_Task',
    'Task',
    'on_item_clicked',
    'view6',
    'view2',
    'button_click2',
    'Result',
    'view1',
    'view3',
    'view4',
    'view5',
    'button_click3',
    'button_click1',
    'Stop_Tasks'
    ]
file_names =  {
    1: '001_vp01',
    2: '001_vp02',
    3: '-',
    4: '002_vp03',
    5: '003_vp04',
    6: '004_vp05',
    7: '005_vp06',
    8: '006_vp07',
    9: '007_vp08',
    10: '008_vp09',
    11: '009_vp10',
    12: '010_vp11',
    13: '011_vp12',
    14: '012_vp13',
    15: '013_vp14',
    16: '014_vp15',
    17: '015_vp16',
    18: '016_vp17',
    19: '017_vp18',
    20: '018_vp19',
}
required_columns = ['Timestamp', 'timestamp_Pupil.2', 'confidence.2', 'norm_pos_x.1', 'norm_pos_y.1']
new_col_names = {
'Timestamp': 'gui_timestamp',
'timestamp_Pupil.2': 'gaze_timestamp',
'confidence.2': 'confidence',
'norm_pos_x.1': 'norm_pos_x',
'norm_pos_y.1': 'norm_pos_y',
}
warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

for pid in participant_id:
    # Prepare Log data
    log_file_path = f"./iMotions/logging/{log_json_names[pid]}.log"
    p_log = pd.read_csv(log_file_path, sep='\t', names=column_names, header=None)[["UTC_time", "markerName"]]
    p_log['UTC_time'] = pd.to_datetime(p_log['UTC_time'], format='%Y-%m-%d_%H:%M:%S.%f')
    p_log['world_timestamp'] = p_log['UTC_time'].apply(lambda x: pd.Timestamp(x).tz_localize('Europe/Berlin').timestamp())
    p_log = p_log[p_log['markerName'].isin(marker_names)].reset_index(drop=True)

    # Prepare Gui data for log
    gui_file_path = f"./iMotions/exports/{file_names[pid]}.csv"
    with open(gui_file_path, 'r') as f:
        lines = f.readlines()
    start_idx = next(i for i, line in enumerate(lines) if line.startswith("Row,"))
    p_raw_gui = pd.read_csv(gui_file_path, skiprows=start_idx)
    p_raw_gui['Timestamp'] = p_raw_gui['Timestamp'] /1000
    p_gui_log = p_raw_gui[['Timestamp','MarkerName']].rename(columns={'Timestamp': 'gui_timestamp'})
    p_gui_log = p_gui_log[p_gui_log['MarkerName'].isin(marker_names)].reset_index(drop=True)
    if pid == 4:
        p_gui_log = p_gui_log[p_gui_log['gui_timestamp'] >= 270].reset_index(drop=True)

    # Merge log data with gui_log
    merged_log_gui_df = pd.merge(
            p_log,
            p_gui_log,
            left_index=True,
            right_index=True
            )
    discrepancies = (merged_log_gui_df['markerName'] != merged_log_gui_df['MarkerName']).sum()

    # Compute world to gui timestamp offset
    wts_to_gui_offset_mean = np.mean(merged_log_gui_df['world_timestamp'] - merged_log_gui_df['gui_timestamp'])
    wts_to_gui_offset_std = np.std(merged_log_gui_df['world_timestamp'] - merged_log_gui_df['gui_timestamp'])

    
    
    # Prepare Gui data for gaze
    if all(col in p_raw_gui.columns for col in required_columns):
        p_gui_gaze = p_raw_gui[required_columns].dropna()
    else:
        print(f"!!! PARTICIPANT {pid} doesn't contain required columns")
        continue
    p_gui_gaze = p_gui_gaze.rename(columns=new_col_names)
    p_gui_gaze['world_timestamp'] = p_gui_gaze['gui_timestamp'] + wts_to_gui_offset_mean
    p_gui_gaze['datetime'] = pd.to_datetime(p_gui_gaze['world_timestamp'], unit='s')
    p_gui_gaze['datetime'] = p_gui_gaze['datetime'].dt.tz_localize('UTC')
    p_gui_gaze['datetime'] = p_gui_gaze['datetime'].dt.tz_convert('Europe/Berlin')

    # Prepare gaze data
    with open(f"./info_players/{log_json_names[pid]}.json") as file:
        meta_info = json.load(file)
    start_timestamp_diff = meta_info["start_time_system_s"] - meta_info["start_time_synced_s"]
    p_gaze = data['all_gaze_positions'].query(f"participant_id == {pid}").loc[:, ['gaze_timestamp', 'confidence', 'norm_pos_x', 'norm_pos_y']].compute().dropna()
    p_gaze["world_timestamp"] = p_gaze["gaze_timestamp"] + start_timestamp_diff
    p_gaze['datetime'] = pd.to_datetime(p_gaze['world_timestamp'], unit='s')
    p_gaze['datetime'] = p_gaze['datetime'].dt.tz_localize('UTC')
    p_gaze['datetime'] = p_gaze['datetime'].dt.tz_convert('Europe/Berlin')

    # Trim and sort
    common_min = max(p_gui_gaze['world_timestamp'].min(), p_gaze['world_timestamp'].min())
    common_max = min(p_gui_gaze['world_timestamp'].max(), p_gaze['world_timestamp'].max())
    p_gui_gaze_trimsort = p_gui_gaze[(p_gui_gaze['world_timestamp'] >= common_min) & (p_gui_gaze['world_timestamp'] <= common_max)].sort_values(by='world_timestamp')
    p_gaze_trimsort = p_gaze[(p_gaze['world_timestamp'] >= common_min) & (p_gaze['world_timestamp'] <= common_max)].sort_values(by='world_timestamp')

In [None]:
# Run other half
tolerances_gaze = {
    1: 0.1,
    2: 0.1,
    3: "-",
    4: 0.1,
    5: 0.1,
    6: 0.1,
    7: 0.05,
    8: 0.1,
    9: 0.1,
    10: 0.05,
    11: 0.1,
    12: 0.15,
    13: 0.1,
    14: 0.1,
    15: 0.3,
    16: 0.03,
    17: 0.3,
    18: 0.1,
    19: 0.1,
    20: 0.1,
}

for pid in participant_id:
    # Merge as of
    p_merged_asof = pd.merge_asof(
        p_gaze_trimsort,
        p_gui_gaze_trimsort,
        on='world_timestamp',
        by=['confidence', 'norm_pos_x', 'norm_pos_y'],
        tolerance= tolerances_gaze[pid],
        direction='nearest'
        )
    na_values = p_merged_asof['gui_timestamp'].isna().sum()

    # Compute offset between pupil and gui timestamps
    offset_mean_asof = np.mean(p_merged_asof['world_timestamp'] - p_merged_asof['gui_timestamp'])
    offset_std_asof = np.std(p_merged_asof['world_timestamp'] - p_merged_asof['gui_timestamp'])

    # Print results
    print("-----------------------------------------------------")
    print()

    print(f"PARTICIPANT {pid}")

    print("-> From logging to gui")
    print(f"Len of log df: {len(p_log)}")
    print(f"Len of gui df for log: {len(p_gui_log)}")
    print(f"Marker discrepancies: {discrepancies}")
    print(f"Offset mean log to gui: {wts_to_gui_offset_mean}")
    print(f"Offset std log to gui: {wts_to_gui_offset_std}")

    print()

    print("-> From gui to pupil")
    print(f"Len of gui df: {len(p_gui_gaze)}")
    print(f"Len of gaze df: {len(p_gaze)}")
    print(f"Len of gui trimsort df: {len(p_gui_gaze_trimsort)}")
    print(f"Len of gaze trimsort df: {len(p_gaze_trimsort)}")

    print()

    print("-> Results")
    print(f"Tolerance: {tolerances_gaze[pid]}")
    print(f"Len of merge_asof df: {len(p_merged_asof)}")
    print(f"Matched values in x (gaze): {len(p_merged_asof) - na_values}")
    print(f"Mean offset merged_asof: {offset_mean_asof}")
    print(f"Std offset merged_asof: {offset_std_asof}")

    print()

    print("-----------------------------------------------------")

### Synchronization computation (simple)
- Manually created dataframe + computation

In [43]:
# Manually creating the DataFrame with data from the provided text files
offset_data = {
    'participant_id': [1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
    'pupil_mean': [
        None, None, None, None, None, None,
        4109.257553304065, 
        22187.97703672082,
        5416.92263870804,
        16531.296070843073,
        84199.35689554777, 
        93620.139733137,
        99778.58807834735,
        3135.9419403789816,
        87722.83890598793, 
        2655.8941165745155,
        4067.6027457861605,
        71583.83790804863,
        80480.59764496642
    ],
    'pupil_std': [
        None, None, None, None, None, None,
        0.0015540616445236736,
        0.0009565443258474121,
        0.0010185332871037824,
        0.0008931335310408102,
        0.0009202319056514787,
        0.0010068354754641231,
        0.0010086043613726362,
        0.0010346697359956188,
        0.000898015048832067,
        0.0008651488841632027, 
        0.004909002020535595,
        0.0015035274843810211,
        0.004411814755109096
    ],
    'gaze_mean': [
        -672862.8254689132,
        -664541.5826019697,
        -588022.8872828754,
        -578918.4627699563,
        82154.6509890003,
        349976.70950270735,
        4109.203127473147,
        22187.92287464568,
        5416.8687206548875,
        16531.242376280596,
        84199.30305764821,
        93620.08597532041,
        99778.53439845197,
        3135.88798966699,
        87722.78483034433,
        2655.8404129786127,
        4067.548879011448,
        71583.78398800883,
        80480.5438803016
    ],
    'gaze_std': [
        0.010101152431444825,
        0.011213037927546807,
        0.016895081555496176,
        0.015107109620048447,
        0.012640083799283774,
        0.01461043801255117,
        0.01599184351455052,
        0.01780569830867164,
        0.010848758750081483,
        0.009713719881618923,
        0.01239760723051959,
        0.010683433863253248,
        0.010193936300467025,
        0.013376659433486439,
        0.019103793234118906,
        0.01194004737632296,
        0.012154765751067805,
        0.012422883072455681,
        0.010797759985148002
    ],
    'log_pupil_mean': [
        None, None, None, None, None, None,
        1710841379.7298098,
        1710859458.4487002,
        1711021383.8964255,
        1711032498.1677978,
        1711100165.706887,
        1711109586.6234353,
        1711115745.0770855,
        1711380580.6615686,
        1711465166.2651808,
        1711530420.2930207,
        1711549938.8896227,
        1711617454.089709,
        1711626350.8593812
    ],
    'log_pupil_std': [
        None, None, None, None, None, None,
        0.002654590308012348,
        0.0019606005221617867,
        0.002596132207761823,
        0.0016050413399801406,
        0.008953334304134418,
        0.002252080743393519,
        0.001826714344806407,
        0.02498242118477865,
        0.006965123911033478,
        0.005961296218748378,
        0.002292924818227741,
        0.0019064997533042869,
        0.0016124437080255506
    ],
    'log_gaze_mean': [
        1710323060.2391326,
        1710331381.5050497,
        1710407899.148977,
        1710417003.592421,
        1710504954.71339,
        1710772775.5559084,
        1710841379.6769013,
        1710859458.3949537,
        1711021383.84353,
        1711032498.1141572,
        1711100165.6529558,
        1711109586.5698,
        1711115745.0236366,
        1711380580.6074257,
        1711465166.2607026,
        1711530420.2390208,
        1711549938.835953,
        1711617454.0360806,
        1711626350.8057556
    ],
    'log_gaze_std': [
        0.009267704577657755,
        0.009865290472380293,
        0.010213497413320831,
        0.010608973953347407,
        0.009436665703948553,
        0.009804471214128545,
        0.013381547240806944,
        0.009739741282879723,
        0.0095273627454879,
        0.009402437996693207,
        0.012820757323793652,
        0.009573894656712597,
        0.009919684910694278,
        0.026339731913368473,
        0.009384245142926977,
        0.009558716130033616,
        0.009923130672874592,
        0.009438752643090517,
        0.009601663696196626
    ],
}

offset_df = pd.DataFrame(offset_data)
offset_df


Unnamed: 0,participant_id,pupil_mean,pupil_std,gaze_mean,gaze_std,log_pupil_mean,log_pupil_std,log_gaze_mean,log_gaze_std
0,1,,,-672862.825469,0.010101,,,1710323000.0,0.009268
1,2,,,-664541.582602,0.011213,,,1710331000.0,0.009865
2,4,,,-588022.887283,0.016895,,,1710408000.0,0.010213
3,5,,,-578918.46277,0.015107,,,1710417000.0,0.010609
4,6,,,82154.650989,0.01264,,,1710505000.0,0.009437
5,7,,,349976.709503,0.01461,,,1710773000.0,0.009804
6,8,4109.257553,0.001554,4109.203127,0.015992,1710841000.0,0.002655,1710841000.0,0.013382
7,9,22187.977037,0.000957,22187.922875,0.017806,1710859000.0,0.001961,1710859000.0,0.00974
8,10,5416.922639,0.001019,5416.868721,0.010849,1711021000.0,0.002596,1711021000.0,0.009527
9,11,16531.296071,0.000893,16531.242376,0.009714,1711032000.0,0.001605,1711032000.0,0.009402


In [44]:
# Get best methods
std_columns = ['pupil_std', 'gaze_std', 'log_pupil_std', 'log_gaze_std']
min_columns = offset_df[std_columns].idxmin(axis=1)
offset_df['best_method'] = min_columns.str.replace('_std', '')

# Add the best mean value by appending '_mean' to the best method
offset_df['best_offset'] = offset_df.apply(lambda row: row[row['best_method'] + '_mean'], axis=1)
offset_df


Unnamed: 0,participant_id,pupil_mean,pupil_std,gaze_mean,gaze_std,log_pupil_mean,log_pupil_std,log_gaze_mean,log_gaze_std,best_method,best_offset
0,1,,,-672862.825469,0.010101,,,1710323000.0,0.009268,log_gaze,1710323000.0
1,2,,,-664541.582602,0.011213,,,1710331000.0,0.009865,log_gaze,1710331000.0
2,4,,,-588022.887283,0.016895,,,1710408000.0,0.010213,log_gaze,1710408000.0
3,5,,,-578918.46277,0.015107,,,1710417000.0,0.010609,log_gaze,1710417000.0
4,6,,,82154.650989,0.01264,,,1710505000.0,0.009437,log_gaze,1710505000.0
5,7,,,349976.709503,0.01461,,,1710773000.0,0.009804,log_gaze,1710773000.0
6,8,4109.257553,0.001554,4109.203127,0.015992,1710841000.0,0.002655,1710841000.0,0.013382,pupil,4109.258
7,9,22187.977037,0.000957,22187.922875,0.017806,1710859000.0,0.001961,1710859000.0,0.00974,pupil,22187.98
8,10,5416.922639,0.001019,5416.868721,0.010849,1711021000.0,0.002596,1711021000.0,0.009527,pupil,5416.923
9,11,16531.296071,0.000893,16531.242376,0.009714,1711032000.0,0.001605,1711032000.0,0.009402,pupil,16531.3


In [60]:
# Preparation
convertion_df_list = []
info_player_names = {
    1: 'p01',
    2: 'p02',
    3: '-',
    4: 'p04',
    5: 'p05',
    6: 'p06',
    7: 'p07',
    8: 'p08',
    9: 'p09',
    10: 'p10',
    11: 'p11',
    12: 'p12',
    13: 'p13',
    14: 'p14',
    15: 'p15',
    16: 'p16',
    17: 'p17',
    18: 'p18',
    19: 'p19',
    20: 'p20',
}

def convert_gui_timestamps(dataf, offset):
    timetstamp_columns = ['start_timestamp', 'clicked_timestamp', 'end_timestamp']
    unix_dataf = dataf.copy()
    for col in timetstamp_columns:
        unix_dataf[col] = unix_dataf[col].apply(lambda x: float(x) + offset)
    unix_dataf['block_data'] = unix_dataf['block_data'].apply(
    lambda lst: [lst[0] + offset, lst[1], lst[2] + offset]
    )

    return unix_dataf

In [61]:
# Pupil method
pupil_offset_df = offset_df[offset_df['best_method'] == 'pupil']
pupil_participant_ids = pupil_offset_df['participant_id'].tolist()

for pid in pupil_participant_ids:
    participant_df = df.query(f"participant_id == {pid}")
    participant_offset_to_pupil = offset_df.query(f"participant_id == {pid}")['best_offset']

    with open(f"./info_players/{info_player_names[pid]}.json") as file:
        meta_info = json.load(file)
    start_timestamp_diff = meta_info["start_time_system_s"] - meta_info["start_time_synced_s"]
    participant_offset_to_unix = participant_offset_to_pupil + start_timestamp_diff  
    
    unix_participant_df = convert_gui_timestamps(participant_df, participant_offset_to_unix)
    convertion_df_list.append(unix_participant_df)

In [62]:
# Gaze method
gaze_offset_df = offset_df[offset_df['best_method'] == 'gaze']
gaze_participant_ids = gaze_offset_df['participant_id'].tolist()

for pid in gaze_participant_ids:
    participant_df = df.query(f"participant_id == {pid}")
    participant_offset_to_pupil = offset_df.query(f"participant_id == {pid}")['best_offset']

    with open(f"./info_players/{info_player_names[pid]}.json") as file:
        meta_info = json.load(file)
    start_timestamp_diff = meta_info["start_time_system_s"] - meta_info["start_time_synced_s"]
    participant_offset_to_unix = participant_offset_to_pupil + start_timestamp_diff  
    
    unix_participant_df = convert_gui_timestamps(participant_df, participant_offset_to_unix)
    convertion_df_list.append(unix_participant_df)

In [63]:
# Log-Pupil method
log_pupil_offset_df = offset_df[offset_df['best_method'] == 'log_pupil']
log_pupil_participant_ids = log_pupil_offset_df['participant_id'].tolist()

for pid in log_pupil_participant_ids:
    participant_df = df.query(f"participant_id == {pid}")
    participant_offset_to_unix = offset_df.query(f"participant_id == {pid}")['best_offset']
    
    unix_participant_df = convert_gui_timestamps(participant_df, participant_offset_to_unix)
    convertion_df_list.append(unix_participant_df)

In [64]:
# Log-Gaze method
log_gaze_offset_df = offset_df[offset_df['best_method'] == 'log_gaze']
log_gaze_participant_ids = log_gaze_offset_df['participant_id'].tolist()

for pid in log_gaze_participant_ids:
    participant_df = df.query(f"participant_id == {pid}")
    participant_offset_to_unix = offset_df.query(f"participant_id == {pid}")['best_offset']
    
    unix_participant_df = convert_gui_timestamps(participant_df, participant_offset_to_unix)
    convertion_df_list.append(unix_participant_df)

In [65]:
new_gui_df = pd.concat(convertion_df_list).sort_values(by=['participant_id', 'block_id', 'task_id']).reset_index(drop=True)

In [67]:
# Check synchronization
pupil_ts = data['all_pupil_positions'].loc[:, ['participant_id', 'pupil_timestamp_unix', 'pupil_timestamp_dt']].dropna().compute()
# gui_ts = data['all_gui_data'].loc[:, ['participant_id', 'start_timestamp', 'end_timestamp']].dropna().compute()
gui_ts = new_gui_df.loc[:, ['participant_id', 'start_timestamp', 'end_timestamp']].dropna()

for pid in participant_ids:
    print(f"PARTICIPANT {pid}")
    
    # Compute minimum and maximum pupil timestamps
    p_pupil_ts = pupil_ts.query(f"participant_id == {pid}")
    min_pupil = p_pupil_ts['pupil_timestamp_dt'].min()
    max_pupil = p_pupil_ts['pupil_timestamp_dt'].max()
    print(f"Pupil timestamp\nmin: {min_pupil}, max: {max_pupil}")

    # Compute minimum and maximum GUI timestamps
    p_gui_ts = gui_ts.query(f"participant_id == {pid}")
    p_gui_start_ts = pd.to_numeric(p_gui_ts['start_timestamp'], errors='coerce')
    p_gui_end_ts = pd.to_numeric(p_gui_ts['end_timestamp'], errors='coerce')
    min_gui = pd.to_datetime(p_gui_ts['start_timestamp'], unit='s').min()
    max_gui = pd.to_datetime(p_gui_ts['end_timestamp'], unit='s').max()
    min_gui = min_gui.tz_localize('UTC').tz_convert('Europe/Berlin')
    max_gui = max_gui.tz_localize('UTC').tz_convert('Europe/Berlin')
    
    print(f"GUI timestamp\nmin: {min_gui}, max: {max_gui}")


PARTICIPANT 1
Pupil timestamp
min: 2024-03-13 10:44:40.346851072+01:00, max: 2024-03-13 11:52:03.755685120+01:00
GUI timestamp
min: 2024-03-13 10:45:24.146390528+01:00, max: 2024-03-13 11:49:45.223203584+01:00
PARTICIPANT 2
Pupil timestamp
min: 2024-03-13 13:02:53.695904256+01:00, max: 2024-03-13 13:53:57.768719360+01:00
GUI timestamp
min: 2024-03-13 13:03:59.757712640+01:00, max: 2024-03-13 13:51:54.275290624+01:00
PARTICIPANT 4
Pupil timestamp
min: 2024-03-14 10:22:40.364917504+01:00, max: 2024-03-14 11:15:02.561565440+01:00
GUI timestamp
min: 2024-03-14 10:22:48.371818752+01:00, max: 2024-03-14 11:13:16.324943872+01:00
PARTICIPANT 5
Pupil timestamp
min: 2024-03-14 12:50:09.019525888+01:00, max: 2024-03-14 13:51:57.872077824+01:00
GUI timestamp
min: 2024-03-14 12:50:58.195889408+01:00, max: 2024-03-14 13:47:45.510671104+01:00
PARTICIPANT 6
Pupil timestamp
min: 2024-03-15 13:16:02.288979200+01:00, max: 2024-03-15 14:10:15.483073280+01:00
GUI timestamp
min: 2024-03-15 13:16:39.32403302

In [106]:
# Check synchronization
pupil_ts = data['all_pupil_positions'].loc[:, ['participant_id', 'pupil_timestamp_unix', 'pupil_timestamp_dt']].dropna().compute()
gui_ts = data['all_gui_data'].loc[:, ['participant_id', 'start_timestamp', 'end_timestamp']].dropna().compute()
# gui_ts = new_gui_df.loc[:, ['participant_id', 'start_timestamp', 'end_timestamp']].dropna()

for pid in participant_ids:
    print(f"PARTICIPANT {pid}")
    
    # Compute minimum and maximum pupil timestamps
    p_pupil_ts = pupil_ts.query(f"participant_id == {pid}")
    min_pupil = p_pupil_ts['pupil_timestamp_dt'].min()
    max_pupil = p_pupil_ts['pupil_timestamp_dt'].max()
    print(f"Pupil timestamp\nmin: {min_pupil}, max: {max_pupil}")

    # Compute minimum and maximum GUI timestamps
    p_gui_ts = gui_ts.query(f"participant_id == {pid}")
    p_gui_start_ts = pd.to_numeric(p_gui_ts['start_timestamp'], errors='coerce')
    p_gui_end_ts = pd.to_numeric(p_gui_ts['end_timestamp'], errors='coerce')
    min_gui = pd.to_datetime(p_gui_ts['start_timestamp'], unit='s').min()
    max_gui = pd.to_datetime(p_gui_ts['end_timestamp'], unit='s').max()
    min_gui = min_gui.tz_localize('UTC').tz_convert('Europe/Berlin')
    max_gui = max_gui.tz_localize('UTC').tz_convert('Europe/Berlin')
    
    print(f"GUI timestamp\nmin: {min_gui}, max: {max_gui}")

PARTICIPANT 1
Pupil timestamp
min: 2024-03-13 10:44:40.346851072+01:00, max: 2024-03-13 11:52:03.755685120+01:00
GUI timestamp
min: 2024-03-13 10:45:24.146390528+01:00, max: 2024-03-13 11:49:45.223203584+01:00
PARTICIPANT 2
Pupil timestamp
min: 2024-03-13 13:02:53.695904256+01:00, max: 2024-03-13 13:53:57.768719360+01:00
GUI timestamp
min: 2024-03-13 13:03:59.757712640+01:00, max: 2024-03-13 13:51:54.275290624+01:00
PARTICIPANT 4
Pupil timestamp
min: 2024-03-14 10:22:40.364917504+01:00, max: 2024-03-14 11:15:02.561565440+01:00
GUI timestamp
min: 2024-03-14 10:22:48.371818752+01:00, max: 2024-03-14 11:13:16.324943872+01:00
PARTICIPANT 5
Pupil timestamp
min: 2024-03-14 12:50:09.019525888+01:00, max: 2024-03-14 13:51:57.872077824+01:00
GUI timestamp
min: 2024-03-14 12:50:58.195889664+01:00, max: 2024-03-14 13:47:45.510671104+01:00
PARTICIPANT 6
Pupil timestamp
min: 2024-03-15 13:16:02.288979200+01:00, max: 2024-03-15 14:10:15.483073280+01:00
GUI timestamp
min: 2024-03-15 13:16:39.32403302

### Synchronization computation (complicated)

In [8]:
# Supporting variables

warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)
method_to_use = {
    1: 'log_gaze',
    2: 'log_gaze',
    4: 'log_gaze',
    5: 'log_gaze',
    6: 'log_gaze',
    7: 'log_gaze',
    8: "pupil",
    9: "pupil",
    10: "pupil",
    11: "pupil",
    12: "pupil",
    13: "pupil",
    14: "pupil",
    15: "pupil",
    16: "pupil",
    17: "pupil",
    18: "log_pupil",
    19: "pupil",
    20: "log_pupil",
}
log_json_names = {
    1: 'p01',
    2: 'p02',
    4: 'p04',
    5: 'p05',
    6: 'p06',
    7: 'p07',
    18: 'p18',
    20: 'p20',
}
gui_file_names =  {
        1: '001_vp01',
        2: '001_vp02',
        4: '002_vp03',
        5: '003_vp04',
        6: '004_vp05',
        7: '005_vp06',
        8: '006_vp07',
        9: '007_vp08',
        10: '008_vp09',
        11: '009_vp10',
        12: '010_vp11',
        13: '011_vp12',
        14: '012_vp13',
        15: '013_vp14',
        16: '014_vp15',
        17: '015_vp16',
        18: '016_vp17',
        19: '017_vp18',
        20: '018_vp19',
}
log_column_names = ["UTC_time", "markerName"]
log_column_names.extend([f"Column{i+1}" for i in range(3, 20)])
pupil_required_columns = ['Timestamp', 'timestamp_Pupil.3', 'confidence.3', 'diameter']
gaze_required_columns = ['Timestamp', 'timestamp_Pupil.2', 'confidence.2', 'norm_pos_x.1', 'norm_pos_y.1']
pupil_new_col_names = {
    'Timestamp': 'gui_timestamp',
    'timestamp_Pupil.3': 'pupil_timestamp',
    'confidence.3': 'confidence'
    }
gaze_new_col_names = {
'Timestamp': 'gui_timestamp',
'timestamp_Pupil.2': 'gaze_timestamp',
'confidence.2': 'confidence',
'norm_pos_x.1': 'norm_pos_x',
'norm_pos_y.1': 'norm_pos_y',
}
marker_names = [
    'Start_Task',
    'Task',
    'on_item_clicked',
    'view6',
    'view2',
    'button_click2',
    'Result',
    'view1',
    'view3',
    'view4',
    'view5',
    'button_click3',
    'button_click1',
    'Stop_Tasks'
    ]
pupil_tolerances = {
    18: 0.03,
    19: 0.03,
}
gaze_tolerances = {
    1: 0.1,
    2: 0.1,
    4: 0.1,
    5: 0.1,
    6: 0.1,
    7: 0.05
}

In [12]:
# Define computation functions

def compute_by_pupil(pid):
    # Prepare gui data
    gui_file_path = f"./iMotions/exports/{gui_file_names[pid]}.csv"
    with open(gui_file_path, 'r') as f:
        lines = f.readlines()
    start_idx = next(i for i, line in enumerate(lines) if line.startswith("Row,"))
    raw_gui_df = pd.read_csv(gui_file_path, skiprows=start_idx)
    gui_df = raw_gui_df[pupil_required_columns].dropna()
    gui_df['Timestamp'] = gui_df['Timestamp'] / 1000
    gui_df = gui_df.rename(columns=pupil_new_col_names)

    # Prepare pupil data
    pupil_df = data['all_pupil_positions'].query(f"participant_id == {pid}").loc[:, ['pupil_timestamp', 'confidence', 'diameter']].compute().dropna()

    # Trim and sort
    common_min = max(gui_df['pupil_timestamp'].min(), pupil_df['pupil_timestamp'].min())
    common_max = min(gui_df['pupil_timestamp'].max(), pupil_df['pupil_timestamp'].max())
    gui_df_trimsort = gui_df[(gui_df['pupil_timestamp'] >= common_min) & (gui_df['pupil_timestamp'] <= common_max)].sort_values(by='pupil_timestamp')
    pupil_df_trimsort = pupil_df[(pupil_df['pupil_timestamp'] >= common_min) & (pupil_df['pupil_timestamp'] <= common_max)].sort_values(by='pupil_timestamp')

    # Merge as of
    merged_df = pd.merge_asof(
        gui_df_trimsort,
        pupil_df_trimsort,
        on='pupil_timestamp',
        by=['confidence', 'diameter'],
        tolerance=0.0015,
        direction='nearest'
        )
    na_values = merged_df['gui_timestamp'].isna().sum()

    # Compute offset
    mean = np.mean(merged_df['pupil_timestamp'] - merged_df['gui_timestamp'])
    std = np.std(merged_df['pupil_timestamp'] - merged_df['gui_timestamp'])

    # Print merge results
    print(f"PARTICIPANT {pid}")
    print(f"Length of merge_df: {len(merged_df)}")
    print(f"Matching rows: {len(merged_df) - na_values}")
    print(f"Offset mean: {mean}")
    print(f"Offset std: {std}")
    print()

    return mean

def compute_by_log_gaze(pid):
    return

def compute_by_log_pupil(pid):
    return

In [13]:
# Run computation

# for pid in participant_ids:
for pid in [8]:
    # Read files 
    
    
    # Compute according to method
    mtd = method_to_use[pid]
    if mtd == "pupil":
        offset = compute_by_pupil(pid)

    elif mtd == "log_gaze":
        offset = compute_by_log_gaze(pid)


    elif mtd == "log_pupil":
        offset = compute_by_log_pupil(pid)



PARTICIPANT 8
Length of merge_df: 1587380
Matching rows: 1587380
Offset mean: 4109.257554936803
Offset std: 0.001551655542518456

