Import libraries

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import os
import astropy.convolution as krn
import scipy.stats as stats

In [18]:
def makeHeat(screenRes, xPos, yPos):
        xMax = screenRes[0]
        yMax = screenRes[1]
        xMin = 0
        yMin = 0
        kernelPar = 50

        # Input handeling
        xlim = np.logical_and(xPos < xMax, xPos > xMin)
        ylim = np.logical_and(yPos < yMax, yPos > yMin)
        xyLim = np.logical_and(xlim, ylim)
        dataX = xPos[xyLim]
        dataX = np.floor(dataX)
        dataY = yPos[xyLim]
        dataY = np.floor(dataY)

        # initiate map and gauskernel
        gazeMap = np.zeros([int((xMax-xMin)),int((yMax-yMin))])+0.0001
        gausKernel = krn.Gaussian2DKernel(kernelPar)

        # Rescale the position vectors (if xmin or ymin != 0)
        dataX -= xMin
        dataY -= yMin

        # Now extract all the unique positions and number of samples
        xy = np.vstack((dataX, dataY)).T
        uniqueXY, idx, counts = uniqueRows(xy)
        uniqueXY = uniqueXY.astype(int)
        # populate the gazeMap
        gazeMap[uniqueXY[:,0], uniqueXY[:,1]] = counts

        # Convolve the gaze with the gauskernel
        heatMap = np.transpose(krn.convolve_fft(gazeMap,gausKernel))
        heatMap = heatMap/np.max(heatMap)

        return heatMap

def uniqueRows(x):
    y = np.ascontiguousarray(x).view(np.dtype((np.void, x.dtype.itemsize * x.shape[1])))
    _, idx, counts = np.unique(y, return_index=True, return_counts = True)
    uniques = x[idx]
    return uniques, idx, counts


def np_euclidean_distance(y_true, y_pred):

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    return np.sqrt(np.sum(np.square(y_pred - y_true), axis=-1))

def dot_error(y_true, y_pred):

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    eucl_dist = np_euclidean_distance(y_true, y_pred)
    # Get indices of unique dot positions
    u, indices = np.unique(y_true, axis=0, return_inverse=True)
    # Make dataframe for each sample of unique dot label and error distance
    df_dict = {'unique_dot': indices, 'eucl_distance': eucl_dist, 'true_x': y_true[:,0],
                'true_y': y_true[:,1], 'pred_x': y_pred[:,0], 'pred_y': y_pred[:,1]}
    df = pd.DataFrame(df_dict)
    # Group by unique dot position, compute median error per dot, average across dots
    mean_dot_error = df.groupby('unique_dot').eucl_distance.median().mean()
    std_dot_error = df.groupby('unique_dot').eucl_distance.median().std()

    return float(mean_dot_error), df, float(std_dot_error)


In [34]:
path_to_folders = 'C:/Users/artem/Dropbox/Appliedwork/CognitiveSolutions/Projects/DeepEye/TechnicalReports/TechnicalReport1/online/complete'
# path_to_folders = 'D:/Dropbox/Appliedwork/CognitiveSolutions/Projects/DeepEye/TechnicalReports/TechnicalReport1/online'

# get all folder names
folder_names = os.listdir(path_to_folders)

pp_list = []
for fn in folder_names:
    path = os.path.join(path_to_folders, fn, fn+'_test_all.csv')       
        
    df = pd.read_csv(path)
        
    
    # Find the headers via duplicates and use it to split into datasets
    # Make indices of datasets
    mask_dup = df.duplicated(keep=False)
    idx_dup = df.index[mask_dup == True].tolist()
    idx_dup[:0] = [-1] # add lower index
    idx_dup.extend([df.shape[0]]) # add upper index
    
    # Use indices to parse datasets
    df_list = []
    count_datasets = 0
    last_numCalibDots = []
    for i in range(len(idx_dup)):
        if i < len(idx_dup) - 1:
            a = df.iloc[idx_dup[i]+1:idx_dup[i+1]]
            a = a.apply(pd.to_numeric, errors='ignore') # when header is written twice, some floats are str, fix this 
            a['dataset_num'] = count_datasets
            a['eucl_dist_px_orig'] = np_euclidean_distance(np.array(a[['x','y']]), np.array(a[['user_pred_px_x','user_pred_px_y']]))
            scale_cm_in_px = a.scrW_cm/a.resX
            a['eucl_dist_cm_orig'] = a.eucl_dist_px_orig * scale_cm_in_px 
            
            if pd.api.types.is_string_dtype(a.sona_pp_id) == True:
                a['platform'] = 'PROLIFIC'
            else:
                a['platform'] = 'SONA'
           
            # Label 25-dot conditions based on preceeding dataset
            if a.numCalibDots.iloc[0] == 25:
                print(f'last: {last_numCalibDots[-1]}')
                if last_numCalibDots[-1] == 9:
                    a['condition'] = '25_9'
                elif last_numCalibDots[-1] == 13:
                    a['condition'] = '25_13'
                
            else:
                a['condition'] = a.numCalibDots.astype(str)
                
            last_numCalibDots.append(a.numCalibDots.iloc[-1]) # log last value                
            
            # Accumulate all dataset per subject
            df_list.append(a)
            count_datasets += 1
    
    
    # if there are more than 4 datasets, remove the recalibrated ones, pick the last one
    last_numCalibDots = pd.Series(last_numCalibDots)
    idx_good_datasets = last_numCalibDots.loc[last_numCalibDots.shift(-1) != last_numCalibDots] # shift dataset by one row and get indices
    df_list = [df_list[i] for i in list(idx_good_datasets.index)] # pick only the 4 datasets
    assert(len(df_list) == 4)
    
    # Concatenate all datasets per subject
    b = pd.concat(df_list)
    
    # Add a subj_nr column
    b['subj_nr'] = fn    
    
    # Accumulate datasets across subjects
    pp_list.append(b)

# Concatenate all subjects in one df
df_all = pd.concat(pp_list)



# Every display resolution is scaled to this one since all dots are drawn in % display size in px
target_resX = 1280.0
target_resY = 800.0    

# To do:
# How many attempts
# Filter out failed last attemtps
# How to deal with missing data for some dots (e.g. for '2023_04_13_13_53_28')

df_all = df_all.reset_index()


# Select subset
"""

'2023_04_15_11_42_39' - amazing performance, but did not do 25_9
"""
# df_all = df_all[df_all.numCalibDots == 9]
# df_all = df_all[(df_all.subj_nr == '2023_04_15_12_22_19')]
# Exclude subjects
df_all = df_all[df_all.subj_nr != '2023_04_07_13_59_57'] # my pilot data
df_all = df_all[df_all.subj_nr != '2023_04_07_13_45_47'] # my pilot data


# user_predictions_px = np.array(df_all[['user_pred_px_x', 'user_pred_px_y']])
df_all['user_pred_px_x_scaled'] = df_all.user_pred_px_x/df_all.resX * target_resX
df_all['user_pred_px_y_scaled'] = df_all.user_pred_px_y/df_all.resY * target_resY
# ground_truths_px = np.array(df_all[['x','y']])
df_all['x_scaled'] = np.round(df_all.x/df_all.resX * target_resX)
df_all['y_scaled'] = np.round(df_all.y/df_all.resY * target_resY)

df_all['scale_cm_in_px'] = df_all.scrW_cm.astype(float)/df_all.resX.astype(float)
scale_cm_in_px = df_all.scale_cm_in_px.mean() # average scaling factor
# scale_cm_in_px = df_all.scrW_cm.astype(float)[0]/df_all.resX.astype(float)[0]  

# Get indices of unique dot positions (unique rows)
u, indices = np.unique(np.array([df_all.x_scaled, df_all.y_scaled]).T, axis=0, return_inverse=True)
df_all['unique_dot'] = indices

last: 13
last: 9
last: 13
last: 9
last: 9
last: 13
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 9
last: 13
last: 9
last: 13
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9
last: 9
last: 13
last: 13
last: 9


In [38]:
"""
Plotting mean E.d. and SD per condition
"""

# df_all = df_all[df_all.condition == '9']

fig, ax = plt.subplots(nrows=2, ncols=4)
fig.set_size_inches((8.5, 11), forward=False)


# Loop thru each condition
count_plots = 0
summary_df_all = []

for name, i in df_all.groupby('condition'):
    
    summary_df = []    
   
    # Loop thru each subject and unique dot
    for _, j in i.groupby(['subj_nr', 'unique_dot']):
        
        # Get median gaze for each unique dot in pixels
        j['median_pred_x'] = j.user_pred_px_x_scaled.median()
        j['median_pred_y'] = j.user_pred_px_y_scaled.median()
        
        # Get euclidean distance from each gaze sample to median gaze for each dot
        j['eucl_dist_gaze_to_median_px'] = np_euclidean_distance(np.array([j.user_pred_px_x_scaled, j.user_pred_px_y_scaled]).T, 
                              np.array([j.median_pred_x, j.median_pred_y]).T)        
        j['eucl_dist_gaze_to_median_cm'] = j.eucl_dist_gaze_to_median_px * scale_cm_in_px
        
        # Get euclidean distance from median gaze to ground truth (accuracy)
        j['offset_px'] = np_euclidean_distance(np.array([j.median_pred_x, j.median_pred_y]).T, np.array([j.x_scaled, j.y_scaled]).T)  
        j['offset_cm'] = j.offset_px * scale_cm_in_px
        
        summary_df.append(j)
        
      
    summary_df = pd.concat(summary_df)
    
    # Get STD (mean distance of gaze_to_median per subject)
    agg_SD = summary_df.groupby(['subj_nr'])[['eucl_dist_gaze_to_median_cm']].mean().reset_index()
    
    # Get accuracy
    agg_OFFSET = summary_df.groupby(['subj_nr'])[['offset_cm']].mean().reset_index()
      
        
    
    # Get subj nr for the largest error
    max_offset_subj = agg_OFFSET.where(agg_OFFSET.offset_cm==agg_OFFSET.offset_cm.max()).dropna().subj_nr
    print(f'Maximum offset is: {agg_OFFSET.offset_cm.max(), max_offset_subj}')
    print('\nMean offset:')
    print(agg_OFFSET.mean())
    print('\nStandard deviation:')
    print(agg_SD.mean())
    
    # Plot euclidean distances per subject
    ax[0, count_plots].title.set_text(f'Condition:{i.condition.iloc[0]}\nOffset')
    ax[0, count_plots].set_ylim(0,4.5)
    ax[0, count_plots].scatter(np.ones(agg_OFFSET.offset_cm.size),agg_OFFSET.offset_cm)
    ax[0, count_plots].scatter(1,agg_OFFSET.offset_cm.mean())
    
    # Plot SD per subject
    ax[1, count_plots].title.set_text(f'Condition:{i.condition.iloc[0]}\nSD')
    ax[1, count_plots].set_ylim(0,4.5)
    ax[1, count_plots].scatter(np.ones(agg_SD.eucl_dist_gaze_to_median_cm.size),agg_SD.eucl_dist_gaze_to_median_cm)
    ax[1, count_plots].scatter(1,agg_SD.eucl_dist_gaze_to_median_cm.mean())
    
    
    
    
    count_plots += 1



Maximum offset is: (2.5723908273220304, 14    2023_04_13_18_25_35
Name: subj_nr, dtype: object)

Mean offset:
                subj_nr  offset_cm
0   2023_04_12_11_19_28   1.111627
1   2023_04_12_12_14_51   1.782335
2   2023_04_12_13_00_41   1.553751
3   2023_04_12_13_20_33   1.895637
4   2023_04_12_21_03_47   1.699054
5   2023_04_12_21_07_32   1.013709
6   2023_04_12_23_30_03   0.945613
7   2023_04_13_07_33_30   1.777390
8   2023_04_13_08_53_13   1.730888
9   2023_04_13_09_27_35   1.154181
10  2023_04_13_13_32_18   1.233027
11  2023_04_13_13_53_28   2.274451
12  2023_04_13_14_57_48   2.226403
13  2023_04_13_17_54_55   1.131968
14  2023_04_13_18_25_35   2.572391
15  2023_04_13_19_29_32   1.800768
16  2023_04_13_20_23_24   1.774295
17  2023_04_13_20_46_16   0.811727
18  2023_04_13_21_04_58   1.660197
19  2023_04_13_21_32_06   0.851520
20  2023_04_14_11_18_47   2.091864
21  2023_04_14_12_03_32   1.632563
22  2023_04_14_13_51_14   1.496785
23  2023_04_14_14_52_27   2.078053
24  2023_04_14_