In [8]:
import pandas as pd
import numpy as np

# Enable interactive Matplotlib plots in the notebook
# %matplotlib qt5


import matplotlib.pyplot as plt
from matplotlib import cm
import os
# import astropy.convolution as krn
# import scipy.stats as stats
import sys

import matplotlib.pyplot as plt
from matplotlib.widgets import Button
import ast   
import re
import matplotlib.patches as patches
import matplotlib.image as mpimg  
import matplotlib.colors as mcolors



# Get latency of all fixations and their order in a trial

In [9]:
# Get fixation latency

def getFixationLatency(df):    

    # Get timestamp of when target was presented and add it to the dataframe

    # 1) get the first time sample when the target is presented
    sampTime_df = df.drop_duplicates(subset=['trialNr'],  keep='first', ignore_index=True)

    # 2)extract the columns needed
    sampTime_df = sampTime_df[['trialNr', 'sampTime']]

    # 3) rename the columns so they would be added
    sampTime_df.columns = ['trialNr', 'targSampTime']

    # 4) merge the target time into the main df (one time per trial)
    df = pd.merge(df, sampTime_df, on="trialNr")

    # Extract saccade latencies

    # 1) select only rows where fixation started
    fl_df = df[df.FixStartEnd == 'fix_start']

    # 2) select only rows with large enough preceeding saccade
#     fl_df = fl_df[fl_df.DistFromPrevFix > 300]

    # 3) compute first fixation duration (saccade latency)
    fl_df['FixLatency'] = fl_df.sampTime - fl_df.targSampTime

    # 4) remove rows where negative Saccade Latencies for trials where no fixation end is present
#     fl_df = fl_df[fl_df.fixOrder > 0]

    # Clip the negative values to zero. This ensures that fixations that carry over and do not have fix_start have a zero latency
    fl_df['fixOrder'] = fl_df['FixLatency'].clip(lower=0)

    
    # 5)
    # Initialize an empty list to hold the groups
    fixorder_groups = []
    
    for name, group in fl_df.groupby('trialNr'):
        
        # Add a new column with the order (rank) of the values
        # 'method='first'' ensures that the order respects the original order in case of ties
        group['FixationOrder'] = group['FixLatency'].rank()
        
        # Append the modified group to the list
        fixorder_groups.append(group)
        
#         print(f'Group: {name}')
#         print(group)
#         print()        
    
    # Concatenate all the modified groups back into a single DataFrame
    fl_df_modified = pd.concat(fixorder_groups)
    
    # Extract the columns needed
    fl_df_modified = fl_df_modified[['sampTime', 'FixLatency', 'FixationOrder']]
    
    # Filter out all rows except fix_start and fix_end
    df_start_end = df[df['FixStartEnd'].isin(['fix_start', 'fix_end'])]

    # Merge the variable into main df
    df_modified = pd.merge(df_start_end, fl_df_modified, on=["sampTime"], how="left")
    
    return df_modified  
    
# df_modified = getFixationLatency(df1)




In [10]:
def handle_carryover_fixations_and_merge(df, max_event_duration):

    # trial_duration = 4000 # maximum duration of the event in ms
    fixcarryover_groups = []
    
    #***STEP 1: Identify, label and correct carryover fixations ***
    # Loop thru trials and identify fixations that carry over the trials
    # There are two options:
    # 1) Fixation starts during previous trial or event and ends at the beginning of the trial (fix_end event without fix_start event)
    # 2) Fixation starts at the end of current trial and finishes after the trial is done (fix_start event without fix_end event)
    # If not corrected, this results in incorrect fixation durations, fixation latency, missing fixations 
    for name, group in df.groupby('trialNr'):
        fix_start = group[group.FixStartEnd == 'fix_start'].FixStartEnd
        fix_end = group[group.FixStartEnd == 'fix_end'].FixStartEnd

        print(fix_start.count(), fix_end.count())

        # if trial starts with fixation end, we need to add a fixation start event
        if group.FixStartEnd.iloc[0] == 'fix_end':
            print(f'Trial {name} starts with fix_end')

            # Add extra fixation start event

            # Reset variable in the first row
            group.FixStartEnd.iloc[0] = 'fix_end_carryover_inserted_start' # label such fixation event
            group.FixDur.iloc[0] = group.sampTime.iloc[0] - group.targSampTime.iloc[0] # recalculate fixation duration

            # Insert a fix_start event
            # Make a copy of the first row with a new index
            first_row = group.iloc[0:1].copy()
            first_row.index = [-1]  # Assign a negative index
            first_row.FixStartEnd = 'fix_start_carryover_inserted_start'
            first_row.FixDur = 0
            first_row.FixLatency = 0


            # Prepend the copied first row to the original DataFrame
            group = pd.concat([first_row, group])

            # Reset the index if you want a continuous numeric index
            group = group.sort_index().reset_index(drop=True)

            # Now we need to re-rank the order of fixations in the trial, since we added a new one in the beginning
            group['FixationOrder'] = group['FixLatency'].rank()


        # if trial ends with fixation start, we need to add a fixation end event
        if group.FixStartEnd.iloc[-1] == 'fix_start':
            print(f'Trial {name} ends with fix_start')

            # Add fixation end event
            # Reset variable in the last row
            group.FixStartEnd.iloc[-1] = 'fix_start_carryover_inserted_end'
            group.FixDur.iloc[-1] = 0

            # Insert a fix_start event
            # Make a copy of the last row with a ne index
            last_row = group.iloc[[-1]].copy()        
            last_row.index = last_row.index+1 # Assign the next index
            last_row.FixStartEnd = 'fix_end_carryover_inserted_end'
            last_row.FixDur = (last_row.targSampTime + max_event_duration) - last_row.sampTime
            last_row.FixLatency = 0
            last_row.FixationOrder= 0

            # Append the copied first row to the original DataFrame
            group = pd.concat([group, last_row])

            # Reset the index if you want a continuous numeric index
            group = group.sort_index().reset_index(drop=True)                      



        # Accumulate groups into a list
        fixcarryover_groups.append(group)

    # Concatenate all the modified groups back into a single DataFrame
    fc_df = pd.concat(fixcarryover_groups)

    #*** STEP 2: Collapse all fixation events, such that all information is provided per each fixation****
    
    # *** 1. Merge all fixation events for fixations that happen within trial ***
    
    # Get only fix_start events
    df_fix_start = fc_df[fc_df.FixStartEnd == 'fix_start']
    # Drop the FixDur column, which should be empty for fix_start events
    df_fix_start = df_fix_start.drop('FixDur', axis=1)

    # Get only fix_end events
    df_fix_end = fc_df[fc_df.FixStartEnd == 'fix_end']
    # Select only the relevant events from fix_end events
    df_fix_end = df_fix_end[["FixXPos", "FixYPos", "FixDur"]]


    # Merge fix start and end for the same fixations
    df_merged = pd.merge(df_fix_start, df_fix_end, on=["FixXPos", "FixYPos"])

    #*** 2. Merge carryover fixation which missed the fix_start event ***
    
    # Get only fix_start_carryover_inserted_start events
    df_fix_start_insert_start = fc_df[fc_df.FixStartEnd == 'fix_start_carryover_inserted_start']
    df_fix_start_insert_start = df_fix_start_insert_start.drop('FixDur', axis=1)

    # Get only fix_end_carryover_inserted_start events
    df_fix_end_insert_start = fc_df[fc_df.FixStartEnd == 'fix_end_carryover_inserted_start']
    # Select only the relevant events from fix_end events
    df_fix_end_insert_start = df_fix_end_insert_start[["FixXPos", "FixYPos", "FixDur"]]

    df_merged_insert_start = pd.merge(df_fix_start_insert_start, df_fix_end_insert_start, on=["FixXPos", "FixYPos"])

    #*** 3. Merge carryover fixations which missed the fix_end event ***
    
    # Get only fix_start_carryover_inserted_end events
    df_fix_start_insert_end = fc_df[fc_df.FixStartEnd == 'fix_start_carryover_inserted_end']
    df_fix_start_insert_end = df_fix_start_insert_end.drop('FixDur', axis=1)

    # Get only fix_end_carryover_inserted_end events
    df_fix_end_insert_end = fc_df[fc_df.FixStartEnd == 'fix_end_carryover_inserted_end']
    # Select only the relevant events from fix_end events
    df_fix_end_insert_end = df_fix_end_insert_end[["FixXPos", "FixYPos", "FixDur"]]

    df_merged_insert_end = pd.merge(df_fix_start_insert_end, df_fix_end_insert_end, on=["FixXPos", "FixYPos"])

    #*** 4. Concatenate all carryover fixations***
    # Now concatenate all carryover fixations, inserted_start and inserted_end
    df_carryover = pd.concat([df_merged_insert_start, df_merged_insert_end], ignore_index=True)

    # Sort the combined DataFrame based on frameNr
    df_carryover = df_carryover.sort_values(by='frameNr')

    
    # *** 5. Now concatenate carryover fixations with within_trial fixations ***
    df_final = pd.concat([df_merged, df_carryover], ignore_index=True)
    # Sort based on frameNr
    df_final = df_final.sort_values(by='frameNr')


    return df_final

# df_final = handle_carryover_fixations_and_merge(df_modified)    
    

# Calculating AOI for each fixation

In [12]:
def addAOI(df):  
    
    """
    df should contain only fixation events, no raw data
    """
    
    def is_point_in_box(point, box):
        """
        Determine if a point is within a bounding box.

        Parameters:
        - point: A tuple (x, y) representing the point.
        - box: A tuple ((x1, y1), (x2, y2)) representing the bounding box, 
               where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner.

        Returns:
        - True if the point is within the box, False otherwise.
        """
        px, py = point
        (x1, y1), (x2, y2) = box

        return x1 <= px <= x2 and y1 <= py <= y2

    def get_bounding_box_assignment(boxes, point):
        """
        Determine the bounding box a point belongs to.

        Parameters:
        - boxes: A list of tuples representing the bounding boxes.
                 Each bounding box is defined as ((x1, y1), (x2, y2)).
        - point: A tuple (x, y) representing the point.

        Returns:
        - The index of the bounding box the point belongs to, or None if it doesn't belong to any boxes.
        """
        for i, box in enumerate(boxes):
            if is_point_in_box(point, box):
                return i
        
        return 'None'


    # import ast   
    # import re
  


    bboxName = 'None'
    bbox_assignments = []
    stim_assignments = []

    aoi_df = df

    for _, row in aoi_df.iterrows():

               
                    
        padding = row.padding # padding around the bounding box

        # Get the offset for the scene, needed to convert bbox coordinates to screen coordinates
        # sceneLocation = ast.literal_eval(row.sceneLocation)
        offset_left = 0 #sceneLocation[0]
        offset_top = 0 # sceneLocation[1]

        # Get all stimuli names for this trial
        bboxesNames = row.bboxesNames
        
        # Convert str to actual list
        # bboxesNames = ast.literal_eval(bboxesNames)

        # Bounding boxes for this trial
        bounding_boxes = []

        # Get coordinates of bboxes for this fixation
        bboxes_coords = row.bboxes
        
        # Convert string to actual list
        # bboxes_coords = ast.literal_eval(bboxes_coords)

        # Ensure bboxes_coords is a list of lists
        if all(isinstance(coord, (int, float)) for coord in bboxes_coords):
            bboxes_coords = [bboxes_coords]  
        
        # Iterate over bboxes            
        for coord in bboxes_coords:      
            # Assemble coordinates for the bounding boxes and include padding. Add offset if the coordinates are relative to the image
            x1 = offset_left + coord[0] - padding # left
            y1 = offset_top + coord[1] - padding  # top
            x2 = offset_left + coord[0] + coord[2] + padding*2 # right
            y2 = offset_top + coord[1] + coord[3] + padding*2   # bottom
            bounding_boxes.append([(x1,y1), (x2,y2)])        

        # Get fixation coordinates
        point = (row.FixXPos, row.FixYPos)
        # get the index of the AOI where this fixation point falls
        assignment = get_bounding_box_assignment(bounding_boxes, point)       

        
        # if fixation in the bounding box, get the stimulus name for this bounding box
        if assignment != 'None':
            # bboxName = re.split('_|-', bboxesNames[assignment])[1] # get the name of the condition           
            # bboxName = bboxName.split('.')[0] # remove the file extension
            bboxName = bboxesNames[assignment]
        else:
            # To specify None, add the stimuli event 
            bboxName = 'None'
            assignment = assignment

        print(f'The point {point} belongs to bounding box: {assignment}, bboxName:{bboxName}')

        # Accumulate assignements
        bbox_assignments.append(assignment)
        stim_assignments.append(bboxName)

    # print(f'bbox_assignments: {len(bbox_assignments)}')
    aoi_df['AOI_bbox'] = bbox_assignments
    aoi_df['AOI_stim'] = stim_assignments

    # Reset index
    aoi_df = aoi_df.reset_index(drop=True)


    return aoi_df


### Plotting function

In [13]:
# import matplotlib.patches as patches
# import ast
# import matplotlib.image as mpimg  
# import matplotlib.colors as mcolors

def draw_bbox(ax, bboxes_coords, colormap='viridis', offset_left=0, offset_top=0, padding=0):

    # get coordinates for the bounding box for congruent and incongruent objects
    if isinstance(bboxes_coords, str):
        bboxes_coords = ast.literal_eval(bboxes_coords)
    
    # Ensure bboxes_coords is a list of lists
    if all(isinstance(coord, (int, float)) for coord in bboxes_coords):
        bboxes_coords = [bboxes_coords]  

    # print(f'Bounding boxes: {bboxes_coords}')  

    # Generate a colormap
    cmap = plt.colormaps[colormap]
    colors = cmap(np.linspace(0, 1, len(bboxes_coords)))          
    
    for idx, bbox_coords in enumerate(bboxes_coords):
        
        bbox_left = offset_left + float(bbox_coords[0])  # adjust based on the scene location
        bbox_top = offset_top + float(bbox_coords[1])  # adjust based on the scene location
        bbox_width = float(bbox_coords[2])
        bbox_height = float(bbox_coords[3])            

        # Draw a rectangle with padding (if specified)
        rect = patches.Rectangle((bbox_left-padding, bbox_top-padding), bbox_width + padding*2, bbox_height + padding*2, 
                            fill=False,
                            edgecolor=colors[idx],
                            linewidth=2)
        
        ax.add_patch(rect)

    return 'done'

def draw_stimuli(ax, img_paths, img_coords, path_to_analysis):

    # Remove the last directory from the path
    new_path = os.path.dirname(path_to_analysis) 

    for img_path, img_coord in zip(img_paths, img_coords):
        # Join the main path with path to stimuli
        img_path_full = os.path.join(new_path, img_path)
        
        # Load the images
        image = mpimg.imread(img_path_full)
        
        # Define the image extent (left, right, bottom, top) in data coordinates
        left = img_coord[0]
        top = img_coord[1]
        width = 480
        height = 480
        extent = [left, left+(width), top+(height), top]
        
        # Plot the image at specific coordinates
        ax.imshow(image, extent=extent)
    
    return 'done'

def plot2d(df, fn, path_to_analysis, bboxes=True, stimuli=True):

    # iterate over the frames for each scene 
    for name, group in df.groupby('trialNr'):
        
        fig, ax = plt.subplots()
        plt.style.use('ggplot')
        plt.grid(False)
        n = group.locStudiedImage.iloc[0]
        plt.title(f'{fn}_{n}_trial{name}') 
       
        # draw stimuli        
        draw_stimuli(ax, [group.imageLeft.iloc[0], group.imageRight.iloc[0]], [(group.leftX.iloc[0], group.Y.iloc[0]), (group.rightX.iloc[0], group.Y.iloc[0])], path_to_analysis)
    
        # draw bounding boxes and offset them based on the scene location
        padding = group.padding.iloc[0] # padding around the bounding box
        draw_bbox(ax, group.bboxes.iloc[0], colormap='viridis', padding=padding)    

         # Plot the raw samples
        raw_h = plt.scatter(group.user_pred_px_x, group.user_pred_px_y, c='orange', alpha=0.5, edgecolors='black')

        # remove no fixations/saccades (zeros) and plot the fixations
        fix_h = plt.scatter(group.FixXPos[group.FixXPos>0], group.FixYPos[group.FixYPos>0], c='blue', alpha=0.5, edgecolors='black')

        plt.xlim((0, df.resX.iloc[0]))
        plt.ylim((df.resY.iloc[0]), 0)

        plt.xlabel('Horizontal eye position (pixels)')
        plt.ylabel('Vertical eye position (pixels)')

        plt.legend((raw_h, fix_h), ('raw samples', 'fixations'), scatterpoints=1)    

        # save figure        
        plt.savefig(os.path.join(path_to_analysis, f'{fn}_{n}_{name}'), dpi=300, pad_inches=0)        
        plt.close()
            


### Read in the data

In [None]:
WHERE ='Office' # 'Office' 'Home'

# Path to data folders
if WHERE == 'Home':
    path = 'C:/Users/aby600/Dropbox/Appliedwork/CognitiveSolutions/Projects/DeepEye/TechnicalReports/TechnicalReport1/Test_PreferentialViewing/Pilot_PreferentialViewing/Young/Approved'
else:
    path = 'D:/Dropbox/Appliedwork/CognitiveSolutions/Projects/DeepEye/TechnicalReports/TechnicalReport1/Test_PreferentialViewing/Pilot_PreferentialViewing/Young/Approved'


# Define the AOI padding in pixels
padding = 0  # padding of AOI on each side, used in plot2D() and addAOI()

def create_directory_if_not_exists(directory_path):
    try:
        os.makedirs(directory_path)
        print(f"Directory '{directory_path}' was created.")
    except FileExistsError:
        # The directory already exists, no need to create it.
        print(f"Directory '{directory_path}' already exists.")

# define data analysis directories and create them if they don't exist yet
path_to_data = os.path.join(path, 'data')
path_to_analysis = os.path.join(path, 'analysis_new')
create_directory_if_not_exists(path_to_analysis)

output_dfs = []
output_dfs_resp = []

# get all folder names
folder_names = os.listdir(path_to_data)

# read and process datafile with fixations (_extra) for each participant
for fn in folder_names:
    path_to_file = os.path.join(path_to_data, fn, fn+'_record_extra.csv')
    
    print(f'Processing participant {fn}...')
        
    try:
        df = pd.read_csv(path_to_file)        
    except:
        print('File does not exist: ' + path_to_file)
        continue
        


    # Extract only samples when the target was presented
    df1 = df[df.event=='target_on']
    df1['padding'] = padding
    # Add bboxes coordinates and bboxes names to the dataframe
    df1['bboxes'] = df1.apply(lambda row: [[row.leftX, row.Y, 480, 480], [row.rightX, row.Y, 480, 480]], axis=1)
    df1['bboxesNames'] = df1.apply(lambda row: ['left', 'right'], axis=1)

    # Plotting 2D fixations      
    # try:
    #     plot2d(df1, fn, path_to_analysis)
    # except ValueError:
    #     continue
        
    df1 = getFixationLatency(df1)
    df1 = handle_carryover_fixations_and_merge(df1, max_event_duration=4000) # specify the maximum duration of the event in ms
    df1 = addAOI(df1)
    
    # Accumulate analyzed data across participants
    output_dfs.append(df1)
       
output_df = pd.concat(output_dfs)
output_df.to_csv(os.path.join(path_to_analysis, 'allSubjects_PV_Young.csv'), index=False)


## Calculate novelty index

In [None]:
def novelty_index(df, fn):

    # select condition
    testPhase_df = df[df.phase=='test']
    # testPhase_df =  testPhase_df.drop_duplicates(subset=['FixXPos', 'FixYPos'], ignore_index=True) # one unique fixation per row
    testPhase_df = testPhase_df[testPhase_df.FixStartEnd=='fix_end']

    screen_centerX = testPhase_df.resX.iloc[0]/2
    distFixToImageBoarder = testPhase_df.distBetweenImages.iloc[0]/2 - 480/2

    # filter out fixations not reaching the image box
    testPhase_df = testPhase_df[((testPhase_df.FixXPos < (screen_centerX-distFixToImageBoarder))
                                         | (testPhase_df.FixXPos > (screen_centerX+distFixToImageBoarder)))]

    # label the fixation on left or right side                                     
    testPhase_df['FixatedImage'] = np.where(testPhase_df.FixXPos < screen_centerX, 'left', 'right')
    testPhase_df['FixatedNovel'] = np.where(testPhase_df.FixatedImage == testPhase_df.locStudiedImage, 'old', 'novel')


    novelty_idx_fix = []
    novelty_idx_fixDur = []
    # iterate through trials
    for i, group in testPhase_df.groupby('trialNr'):

        # calculate the proportion of novel fixations on each trial
        novelty_idx_fix.append(group.FixatedNovel[group.FixatedNovel=='novel'].count() / group.FixatedNovel.count())

        # calculate the proportion of novel fixationTime on each trial
        novelty_idx_fixDur.append(group.FixDur[group.FixatedNovel=='novel'].sum() / group.FixDur.sum())

    novelty_idx_fix = np.array(novelty_idx_fix)
    novelty_idx_fixDur = np.array(novelty_idx_fixDur)

    # Prepare output df
    output_df = testPhase_df.drop_duplicates(subset=['trialNr'], ignore_index=True) # one trial per row
    output_df = output_df.drop(['frameNr','sampTime', 'user_pred_px_x', 'user_pred_px_y'], axis=1) # drop columns by name
    output_df = output_df.iloc[:,:20] # drop columns by index

    # Log the novelty indices per trial
    output_df['noveltyIdx_fixCountProp'] = novelty_idx_fix
    output_df['noveltyIdx_fixDurProp'] = novelty_idx_fixDur
    
    # Add subject number based on deepeye id
    output_df['deepeye-id'] = fn

    # Save the output file
    # output_df.to_csv(os.path.join(path_to_folders, 'analysis', fn+'_analyzed.csv'))
    
    return output_df


