In [4]:

class Preprocessing():
    def __init__(self, length_th, minpixels_th, dt_th, maxpixels_th): 
        # The thresholds for drag and doubleclicks detection determine the correct replay of task, how important are to calibrate them? 
        # Many of these functions make copies of dataframe, so it is not efficient. 
        # Drags
        self.length_th = length_th 
        self.minpixels_th = minpixels_th
        # Double clicks
        self.dt_th = dt_th 
        self.maxpixels_th = maxpixels_th

    def run(self, sample): 
        sample = self.replace_hotkeys(sample)                                             # Detect hotkeys event
        sample['trajectory'] = sample['trajectory'].map(lambda x: self.string2list(x))    # Convert a list represented as a string into a actual list
        sample = self.replace_drags(sample)                                               # Detect drag events
        actions = sample[sample['event'].map(lambda x: 'pressed' in x)].copy()            # Select pressed events only
        actions = self.replace_doubleclicks(actions)                                      # Detect double clicks
        actions['event'] = actions['event'].map(lambda x: x.replace('pressed ',''))       # Remove pressed string
        delays = actions['timestamp'][1:].values - actions['timestamp'][0:-1].values      # Calculating delays
        delays = np.append(delays,0.0)                                                
        actions['delay'] = delays
        actions = actions.reset_index(drop=True)                                          # Reset dataframe index
        return actions

    def string2list(self, string): 
        """ Convert the trajectory string into an actual python list"""
        if isinstance(string, str):
            aux1 = string.strip('][').split('), (')
            if aux1[0] != '':
                aux2 = [x.strip(')(').split(', ') for x in aux1]
                trajectory = [(float(time), int(px), int(py)) for time,px,py in aux2]
            else:
                trajectory = []
        else:
            trajectory = []
        return trajectory

    def replace_drags(self, sample):
        """ Transform the dataframe to encode drag events"""
        samplecopy = sample.copy()
        samplecopy['drag2px'] = len(samplecopy)*[None]
        samplecopy['drag2py'] = len(samplecopy)*[None]
        for ix in self.find_drags(sample):
            samplecopy.loc[ix, 'event'] = 'pressed Button.left.drag'
            samplecopy.loc[ix, 'drag2px'] = sample.loc[ix+1, 'px']
            samplecopy.loc[ix, 'drag2py'] = sample.loc[ix+1, 'py']
        return samplecopy
    
    def find_drags(self, sample):
        rBleft_indexes = sample.index[sample['event'].map(lambda x: 'released Button.left' in x)]
        drags_indexes = []
        for ix in rBleft_indexes:
            length = len(sample.loc[ix,'trajectory'])
            p1 = (sample.loc[ix-1,'px'],sample.loc[ix-1,'py'])
            p2 = (sample.loc[ix,'px'], sample.loc[ix,'py'])
            if (length >= self.length_th) and ((abs(p1[0]-p2[0]) >= self.minpixels_th) or (abs(p1[1] - p2[1]) >= self.minpixels_th)): 
                drags_indexes.append(ix-1)
        return drags_indexes
    
    def replace_doubleclicks(self, sample):
        """ Transform the dataframe to encode double events """
        samplecopy = sample.copy()
        for ix0,ix1 in self.find_doubleclicks(sample):
            samplecopy.loc[ix0, 'event'] = 'pressed Button.left.double'
            samplecopy = samplecopy.drop([ix1])
        return samplecopy

    def find_doubleclicks(self, sample):
        pBleft_indexes = sample.index[sample['event'].map(lambda x: 'pressed Button.left' in x)].tolist()
        dclicks_indexes = []
        while len(pBleft_indexes) > 1:
            ix0, ix1 = pBleft_indexes[0], pBleft_indexes[1]
            dt = sample.loc[ix1, 'timestamp'] - sample.loc[ix0, 'timestamp']
            p1 = (sample.loc[ix0, 'px'], sample.loc[ix0, 'py'])
            p2 = (sample.loc[ix1, 'px'], sample.loc[ix1, 'py'])
            if (dt <= self.dt_th) and (abs(p1[0]-p2[0]) <= self.maxpixels_th) and (abs(p1[1] - p2[1]) <= self.maxpixels_th): 
                dclicks_indexes.append((ix0, ix1))
                pBleft_indexes.pop(1)   
            pBleft_indexes.pop(0)
        return dclicks_indexes


from glob import glob 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

ix_sample = -1
sample_paths = glob('../testing_data/*/*.csv')
[print(x) for x in sample_paths]
sample_path = sample_paths[ix_sample]
print('usamos este:',sample_path)
sample = pd.read_csv(sample_path, index_col=0)

processing = Preprocessing(length_th = 5, minpixels_th = 1, dt_th = 0.22, maxpixels_th = 0)
processing.run(sample)

../testing_data/sample1/rawdata_2024-03-14 14:28:22.728846.csv
../testing_data/sample2/rawdata_2024-03-14 14:33:33.201927.csv
../testing_data/sample3/rawdata_2024-03-14 14:33:55.827738.csv
../testing_data/sample6/rawdata_2024-03-19 09:34:51.268343.csv
../testing_data/sample_drag_doubleclick/rawdata_2024-03-17 07:20:28.158737.csv
../testing_data/sample5/rawdata_2024-03-18 11:28:38.545670.csv
../testing_data/vocab_sample/rawdata_2024-03-15 11:22:49.089336.csv
../testing_data/sample_onlyclick/rawdata_2024-03-17 07:48:26.088002.csv
../testing_data/sample4/rawdata_2024-03-18 11:10:21.221982.csv
usamos este: ../testing_data/sample4/rawdata_2024-03-18 11:10:21.221982.csv


Unnamed: 0,timestamp,img_path,px,py,event,trajectory,drag2px,drag2py,delay
0,2.306088,data/sample4/screen0000000001_2.27756337448954...,1756,44,Button.left,[],,,4.486895
1,6.792983,data/sample4/screen0000000003_6.74854980036616...,1839,53,Button.left,[],,,2.611831
2,9.404814,data/sample4/screen0000000005_9.36418135836720...,1819,223,Button.left,[],,,1.655704
3,11.060518,data/sample4/screen0000000007_11.0348400855436...,1835,50,Button.left,[],,,3.754843
4,14.815361,data/sample4/screen0000000009_14.7809658115729...,31,133,Button.left,[],,,5.822582
5,20.637943,data/sample4/screen0000000011_20.5917414315044...,995,546,Button.left,[],,,2.198972
6,22.836915,data/sample4/screen0000000013_22.8019207669422...,1099,717,Button.left,[],,,3.216257
7,26.053172,data/sample4/screen0000000015_26.0273382784798...,1050,131,Button.left,[],,,2.884017
8,28.937189,data/sample4/screen0000000017_28.8982620472088...,1047,129,Button.left,[],,,3.191536
9,32.128725,data/sample4/screen0000000019_32.1026295982301...,803,127,Button.left,[],,,5.082636
