# This notebook is used to extract fixations for each participant

### Import libraries

In [7]:
import pandas as pd
import numpy as np
import os

# Enable interactive Matplotlib plots in the notebook
%matplotlib qt5

import matplotlib.pyplot as plt
from matplotlib import cm
import os
import astropy.convolution as krn
import scipy.stats as stats
import sys

import matplotlib.pyplot as plt
from matplotlib.widgets import Button



### Preprocess, extract fixations and add them to the dataframe

In [8]:
def extract_fixations(df, path):
    
    sys.path.append('./FixationDetection')
    from I2MC import runI2MC
    
    # order frames and drop duplicate samples (with same sampleTime)
    df = df[df.fName.notna()]
    df.frameNr = df.frameNr.apply(pd.to_numeric, errors='coerce') # if framerNr is not a number, it is replaces with nan
    df = df[df.frameNr.notna()] # filter out rows where frameNr is a nan

    df = df[df.sampTime.notna()]
    df = df[df.user_pred_px_x.notna()]
    df = df[df.user_pred_px_y.notna()]
    df = df.apply(pd.to_numeric, errors='ignore') # if str convert str to numbers

    df = df.sort_values('frameNr')
    df = df.reset_index(drop=True)
    # df = df.drop_duplicates(subset=['user_pred_px_x', 'user_pred_px_y'], ignore_index=True)
    df = df.drop_duplicates(subset=['sampTime'], ignore_index=True)
       
    
    # get fixations for the original datafile for each participant
    fixDF = runI2MC(path, plotData = False)

    # add extracted fixations to the original data file (two new columns)
    # for each timestamp where fixation was detected, FixXPos and FixYPos are added
    idx = 0 # index of fixDF
    FixXPos = np.zeros(df.shape[0])
    FixYPos = np.zeros(df.shape[0])
    FixStartEnd = np.empty(df.shape[0], dtype='U10')
    FixStartEnd.fill('') # explicitly fill the array (good practice)
    FixDur = np.zeros(df.shape[0])

    DistFromPrevFix = np.zeros(df.shape[0])
    PrevFixXPos = np.zeros(df.shape[0])
    PrevFixYPos = np.zeros(df.shape[0])
    prev_fix_x = False # keep track of xy when fixation ends
    prev_fix_y = False

    PrevFixSampTime = np.zeros(df.shape[0])
    prev_fix_sampTime = 0

    # iterate thru the original dataframe, thru each sample
    for index, row in df.iterrows():

        # make sure not to iterate out of range
        if idx < fixDF.shape[0]:

            # go to next fixation when fixation ends
            if row['sampTime'] > np.array(fixDF.FixEnd)[idx]:
                    idx += 1

            # make sure not to iterate out of range
            if idx < fixDF.shape[0]:

                # when samples are within fixation, accumulate FixXPos and FixYPos
                if row['sampTime'] >= np.array(fixDF.FixStart)[idx] and row['sampTime'] <= np.array(fixDF.FixEnd)[idx]:

                    FixXPos[index] = (np.array(fixDF.XPos)[idx])
                    FixYPos[index] = (np.array(fixDF.YPos)[idx])

                # label samples on which fixation starts and ends
                if row['sampTime'] == np.array(fixDF.FixStart)[idx]:             
                    FixStartEnd[index] = 'fix_start'

                    if prev_fix_x != False:

                        PrevFixXPos[index] = prev_fix_x
                        PrevFixYPos[index] = prev_fix_y

                        DistFromPrevFix[index] = np.sqrt((np.array(fixDF.XPos)[idx] - prev_fix_x)**2 
                                                + (np.array(fixDF.YPos)[idx] - prev_fix_y)**2)
                        PrevFixSampTime[index] = prev_fix_sampTime


                elif row['sampTime'] == np.array(fixDF.FixEnd)[idx]:                
                    FixStartEnd[index] = 'fix_end' 
                    FixDur[index] = np.array(fixDF.FixDur)[idx]

                    prev_fix_x = np.array(fixDF.XPos)[idx]
                    prev_fix_y = np.array(fixDF.YPos)[idx]
                    prev_fix_sampTime = np.array(row['sampTime'])




    # add fixations to original dataframe
    df['FixXPos'] = np.array(FixXPos)
    df['FixYPos'] = np.array(FixYPos)
    df['FixStartEnd'] = FixStartEnd
    df['FixDur'] = np.array(FixDur)
    df['DistFromPrevFix'] = DistFromPrevFix
    df['PrevFixSampTime'] = PrevFixSampTime
    df['PrevFixXPos'] = PrevFixXPos
    df['PrevFixYPos'] = PrevFixYPos
    
    
    # Remove all negative xs, ys
    df = df[(df['FixXPos'] > 0) & (df['FixYPos'] > 0) & (df['user_pred_px_x'] > 0) & (df['user_pred_px_y'] > 0)]


    # Save the pre-processed dataframe
    df.to_csv((os.path.splitext(path)[0] + '_extra.csv'), index=False)  


    # Extract only samples when the target was presented
    df = df[df.event=='target_on']
    
    return df
    

# # # Label trials with too few data points
# # a = df.groupby('trialNr').count().reset_index()
# # a = a[['trialNr', 'sampTime']]
# # # 3) rename the columns so they would be added
# # a.columns = ['trialNr', 'samplesPerTrial']
# # df = pd.merge(df, a, on="trialNr")


### For each subject fixations are extracted and added to the original datafile and saved as '[original_filename]_record' + '_extra.csv'

In [9]:
# Path to data folders
# path_to_data = 'D:/Dropbox/Appliedwork/CognitiveSolutions/Projects/DeepEye/TechnicalReports/TechnicalReport1/Test_Spaak/data'
path_to_data = 'C:/Users/artem/Dropbox/Appliedwork/CognitiveSolutions/Projects/DeepEye/TechnicalReports/TechnicalReport1/Test_Spaak/data/approved/data'

# get all folder names
folder_names = os.listdir(path_to_data)

# read and process original datafile for each participant
for fn in folder_names:
    path_to_file = os.path.join(path_to_data, fn, fn+'_record.csv')
    
    print(f'Processing participant {fn}...')

    # Read the file and skip the bad rows    
    try:
       df = pd.read_csv(path_to_file, on_bad_lines='skip')       
    except:
        print('File does not exist: ' + path_to_file)
        continue
        
    df1 = extract_fixations(df, path_to_file)
    
   

Processing participant 2024_06_12_10_08_09...



Importing and processing: "C:/Users/artem/Dropbox/Appliedwork/CognitiveSolutions/Projects/DeepEye/TechnicalReports/TechnicalReport1/Test_Spaak/data/approved/data\2024_06_12_10_08_09\2024_06_12_10_08_09_record.csv"
	Searching for valid interpolation windows
	Replace interpolation windows with Steffen interpolation
	2-Means clustering started for averaged signal
	Determining fixations based on clustering weight mean for averaged signal and separate eyes + 2*std


  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)




I2MC took 4.922480583190918s to finish!
