In [9]:
# A simpler approach would be not to do this preprocessing and do replay including releases
# Avoiding hoykey, double clicks and drags detections 
class Preprocessing():
    def __init__(self, length_th, minpixels_th, dt_th, maxpixels_th): 
        # The thresholds for drag and doubleclicks detection determine the correct replay of task, how important are to calibrate them? 
        # Many of these functions make copies of dataframe, so it is not efficient. 
        # Drags
        self.length_th = length_th 
        self.minpixels_th = minpixels_th
        # Double clicks
        self.dt_th = dt_th 
        self.maxpixels_th = maxpixels_th

    def run(self, sample): 
        s1 = sample.copy()                                                          # The order of following processing is important
        s1['trajectory'] = s1['trajectory'].map(lambda x: self.string2list(x))      # Convert a list represented as a string into a actual list
        #s1 = self.capsnumlocks_conversion(s1)                                       # Conversion according with num_lock and caps_lock states
        s1 = self.numlocks_conversion(s1)                                           # Conversion according with num_lock state
        s1 = self.replace_drags(s1)                                                 # Detect drag events
        s1 = self.replace_doubleclicks(s1)                                          # Detect doubleclicks
        delays = s1['timestamp'][1:].values - s1['timestamp'][0:-1].values          # Calculating delays
        delays = np.append(delays,0.0)                                                
        s1['delay'] = delays
        actions = s1.reset_index(drop=True)                                     # Reset dataframe index
        return actions 
    
    def capsnumlocks_conversion(self, sample):
        """ This function is partially correct, don't use it"""
        # It requires to be prepared for especial cases where user keeps press caps_locks while they are writing
        # I couldn't understand the behavior of theses cases. I decided to not use it, until I solve this problem. 
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True)
        numlock_flag = False
        capslock_flag = False
        for i, event in enumerate(samplecopy['event']): 
            if event == 'pressed Key.caps_lock':
                capslock_flag = not capslock_flag 
            elif event == 'pressed Key.num_lock':
                numlock_flag = not numlock_flag
            elif '<65437>' in event: #Special case in Ubuntu only
                if numlock_flag: 
                    samplecopy.loc[i,'event'] = event.replace('<65437>', '5')
                else:
                    samplecopy.loc[i,'event'] = event.replace('<65437>', '')
            elif len(event.replace('pressed ','')) == 1 or len(event.replace('released ','')) == 1:
                if capslock_flag:
                    samplecopy.loc[i,'event'] = event[:-1] + event[-1].upper()
                else:
                    samplecopy.loc[i,'event'] = event[:-1] + event[-1].lower()
            else:
                pass
        return samplecopy
    
    def numlocks_conversion(self, sample):
        """ Convert the event <65437> into 5 or None for Ubuntu"""
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True)
        numlock_flag = False
        for i, event in enumerate(samplecopy['event']): 
            if event == 'pressed Key.num_lock':
                numlock_flag = not numlock_flag
            elif '<65437>' in event: #Special case in Ubuntu only
                if numlock_flag: 
                    samplecopy.loc[i,'event'] = event.replace('<65437>', '5')
                else:
                    samplecopy.loc[i,'event'] = event.replace('<65437>', 'None')
            else:
                pass
        return samplecopy
       
    def string2list(self, string): 
        """ Convert the trajectory string into an actual python list"""
        if isinstance(string, str):
            aux1 = string.strip('][').split('), (')
            if aux1[0] != '':
                aux2 = [x.strip(')(').split(', ') for x in aux1]
                trajectory = [(float(time), int(px), int(py)) for time,px,py in aux2]
            else:
                trajectory = []
        else:
            trajectory = []
        return trajectory

    def replace_drags(self, sample):
        """ Transform the dataframe to encode drag events"""
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True) 
        drags_indexes = self.find_drags(samplecopy)
        for ix in drags_indexes:
            samplecopy.loc[ix+1, 'px'] = samplecopy['px'][ix]
            samplecopy.loc[ix+1, 'py'] = samplecopy['py'][ix]
            samplecopy.loc[ix+1, 'event'] = 'Button.left.drag'
        samplecopy = samplecopy.drop(drags_indexes)
        return samplecopy
    
    def find_drags(self, sample):
        rBleft_indexes = sample.index[sample['event'].map(lambda x: 'released Button.left' == x)]
        drags_indexes = []
        for ix in rBleft_indexes:
            if sample['event'][ix-1] == 'pressed Button.left':
                length = len(sample.loc[ix,'trajectory'])
                p1 = (sample.loc[ix-1,'px'],sample.loc[ix-1,'py'])
                p2 = (sample.loc[ix,'px'], sample.loc[ix,'py'])
                if (length >= self.length_th) and ((abs(p1[0]-p2[0]) >= self.minpixels_th) or (abs(p1[1] - p2[1]) >= self.minpixels_th)): 
                    drags_indexes.append(ix-1)
        return drags_indexes
    
    def replace_doubleclicks(self, sample):
        """ Transform the dataframe to encode double events """
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True) 
        for ix0,ix1 in self.find_doubleclicks(samplecopy):
            samplecopy.loc[ix0, 'event'] = 'Button.left.double'
            samplecopy.loc[ix0, 'timestamp'] = samplecopy['timestamp'][ix1+1]
            samplecopy = samplecopy.drop([ix0+1, ix1, ix1+1])
        return samplecopy

    def find_doubleclicks(self, sample):
        pBleft_indexes = sample.index[sample['event'].map(lambda x: 'pressed Button.left' == x)].tolist()
        dclicks_indexes = []
        while len(pBleft_indexes) > 1:
            ix0, ix1 = pBleft_indexes[0], pBleft_indexes[1]
            dt = sample.loc[ix1, 'timestamp'] - sample.loc[ix0, 'timestamp']
            p1 = (sample.loc[ix0, 'px'], sample.loc[ix0, 'py'])
            p2 = (sample.loc[ix1, 'px'], sample.loc[ix1, 'py'])
            if (dt <= self.dt_th) and (abs(p1[0]-p2[0]) <= self.maxpixels_th) and (abs(p1[1] - p2[1]) <= self.maxpixels_th): 
                dclicks_indexes.append((ix0, ix1))
                pBleft_indexes.pop(1)   
            pBleft_indexes.pop(0)
        return dclicks_indexes

from glob import glob 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

ix_sample = -1
sample_paths = glob('/home/ezamorag/Datasets/dataprueba/data/*/raw_pcdata.csv')
[print(x) for x in sample_paths]
sample_path = sample_paths[ix_sample]
print('usamos este:',sample_path)
sample = pd.read_csv(sample_path)

processing = Preprocessing(length_th = 5, minpixels_th = 1, dt_th = 0.22, maxpixels_th = 0)
a = processing.run(sample)
print(a.to_string())

/home/ezamorag/Datasets/dataprueba/data/sample001/raw_pcdata.csv
usamos este: /home/ezamorag/Datasets/dataprueba/data/sample001/raw_pcdata.csv
     timestamp                                                img_path    px   py                   event                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                