In [76]:
# A simpler approach would be not to do this preprocessing and do replay including releases
# Avoiding hoykey, double clicks and drags detections 
class Preprocessing():
    def __init__(self, length_th, minpixels_th, dt_th, maxpixels_th): 
        # The thresholds for drag and doubleclicks detection determine the correct replay of task, how important are to calibrate them? 
        # Many of these functions make copies of dataframe, so it is not efficient. 
        # Drags
        self.length_th = length_th 
        self.minpixels_th = minpixels_th
        # Double clicks
        self.dt_th = dt_th 
        self.maxpixels_th = maxpixels_th

    def run(self, sample): 
        s1 = sample.copy()                                                          # The order of following processing is important
        s1['trajectory'] = s1['trajectory'].map(lambda x: self.string2list(x))      # Convert a list represented as a string into a actual list
        s1 = self.replace_drags(s1)                                                 # Detect drag events
        s1 = self.replace_doubleclicks(s1)                                          # Detect doubleclicks
        s1 = self.converts_shifted_events(s1)                                       # Convert shifted events which are different, but equivalent. 
        #s1.to_csv('shifted.csv')
        #check(s1)
        s1 = self.replace_hotkeys(s1)                                               # Detect hotkeys and combinations
        s1 = s1[s1['event'].map(lambda x: ('pressed' in x) or ('Scroll.' in x))].copy()   # Select pressed and scroll events
        s1['event'] = s1['event'].map(lambda x: x.replace('pressed ',''))                 # Remove pressed string
        delays = s1['timestamp'][1:].values - s1['timestamp'][0:-1].values                # Calculating delays
        delays = np.append(delays,0.0)                                                
        s1['delay'] = delays
        actions = s1.reset_index(drop=True)                                     # Reset dataframe index
        return actions
    
    def converts_shifted_events(self, sample):
        """ Search for pressed and released events that are the same symbol because of shift key """
        # Find all Key.shift events -> Which key.shift event do have releases after it? -> 
        # -> Yes, does this release have a corresponding press event before shift? -> Yes, change the release event. 
        delta = 2  # 1 or 2, the other values have not been proven
        no_shifted = "|1234567890'¿qwertyuiop´+asdfghjklñ{}<zxcvbnm,.-"
        shifted =    '°!"#$%&/()=?¡QWERTYUIOP¨*ASDFGHJKLÑ[]>ZXCVBNM;:_'
        # Assuming this conversion is valid for all keyboards 
        def converts(c):
            co = None
            if c in no_shifted:
                co = shifted[no_shifted.index(c)]
            elif c in shifted:
                co = no_shifted[shifted.index(c)]
            else:
                print('This character was not found in shifted strings: {c}')
            return co
        
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True)
        ixs_shift = samplecopy.index[samplecopy['event'].map(lambda x: 'Key.shift_r' in x or 'Key.shift' in x or 'Key.shift_l' in x)].tolist()
        while len(ixs_shift) > 0:  # There must be at least 2 pressed events 
            ix = ixs_shift.pop(0)
            ix_releases = samplecopy.loc[ix+1:ix+1+delta].index[samplecopy.loc[ix+1:ix+1+delta,'event'].map(lambda x: 'released' in x)].tolist()
            for ixR in ix_releases:
                noshifted_key = samplecopy['event'][ixR].replace('released ', '')
                if len(noshifted_key) == 1: # This release event must be a single character
                    shifted_key = converts(noshifted_key)
                    possible_keys = samplecopy.loc[ix-delta:ix].index[samplecopy.loc[ix-delta:ix,'event'] == 'pressed ' + shifted_key].tolist()
                    r1 = samplecopy['event'][ix-delta].replace('pressed', 'released')
                    if len(possible_keys) > 0 and (samplecopy['event'][ix-delta+1] != r1): 
                        print(f'Correction for the shift event at {ixR}. ', samplecopy.loc[ixR,'event'], ' --> ' 'released ' + shifted_key)
                        samplecopy.loc[ixR,'event'] = 'released ' + shifted_key
                        
        return samplecopy 
       
    def replace_hotkeys(self, sample):
        """ Replace rows in the sample by hotkey events """
        # The key insight: consecutive pressed events form a hotkey. 
        # And the number of releases (di) between groups of consective pressed events determine the keys that are keeping press (basepressed). 
        # Note: Remove their corresponding releases
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True) 
        for ix, ixN in self.find_hotkeys(samplecopy):
            ixs_pressed = samplecopy.loc[ix:ixN].index[samplecopy.loc[ix:ixN,'event'].map(lambda x: 'pressed' in x)].tolist()
            pgroups = self.group_consecutive(ixs_pressed)  # Groups of consecutive position indexes of pressed events
            keep_ixs = []
            basepressed = []
            for i in range(len(pgroups)): 
                pgroup = basepressed + pgroups[i]
                newevent = 'pressed ' + samplecopy.loc[pgroup[0], 'event'].replace('pressed ', '')
                for ki in pgroup[1:]: 
                    newevent += '+' + samplecopy.loc[ki, 'event'].replace('pressed ', '')
                samplecopy.loc[pgroup[-1], 'event'] = newevent
                keep_ixs.append(pgroup[-1])
                if i+1 < len(pgroups):
                    di = pgroups[i+1][0] - pgroup[-1] - 1
                    basepressed = pgroup[:-di]
            # Remove rows
            remove_ixs = [i for i in range(ix,ixN+1)]
            [remove_ixs.remove(i) for i in keep_ixs]
            samplecopy = samplecopy.drop(remove_ixs)
        return samplecopy 
    
    def group_consecutive(self, numbers):
        """ Making groups of consecutive numbers """ # numbers list cannot be empty by design.
        groups = []
        current_group = [numbers[0]]
        for i in range(1, len(numbers)):
            if numbers[i] == numbers[i-1] + 1:
                current_group.append(numbers[i])
            else:
                groups.append(current_group)
                current_group = [numbers[i]]
        groups.append(current_group)
        return groups
    
    def find_hotkeys(self, sample):
        """ Look for candidates of hotkeys based on pressed events """
        ixs_pressed = sample.index[sample['event'].map(lambda x: 'pressed' in x)].tolist()
        hotkeys_indexes = []
        while len(ixs_pressed) >= 2:  # There must be at least 2 pressed events 
            ix = ixs_pressed[0]
            released1 = sample['event'][ix].replace('pressed', 'released')
            ixN = sample.loc[ix:].index[sample.loc[ix:,'event'] == released1][0]
            if ixN - ix >= 2 and not self.are_all_single_chars(ix, ixN, sample) and not self.are_all_thesamekey(ix, ixN, sample):  
                hotkeys_indexes.append((ix, ixN))
            for i in range(ix,ixN):
                try: 
                    ixs_pressed.remove(i)
                except:
                    pass
        return hotkeys_indexes
    
    def are_all_single_chars(self, ix, ixN, sample):
        """ Check if all pressed keys are characters """
        # Hotkeys must have at least a non-character key and not Key.space
        events = sample['event'][ix:ixN][sample['event'][ix:ixN].map(lambda x: 'pressed' in x)].tolist()
        #conds = [len(e.replace('pressed ','')) == 1 for e in events]
        conds = [(len(e.replace('pressed ','')) == 1) or (e.replace('pressed ','') == 'Key.space') for e in events]
        flag = conds[0]
        for c in conds[1:]:
            flag *= c
        return flag
    
    def are_all_thesamekey(self, ix, ixN, sample):
        """ Check if all pressed keys are the same key """
        # In this case, it is not hotkey, but a key continuously pressed
        events = sample['event'][ix:ixN][sample['event'][ix:ixN].map(lambda x: 'pressed' in x)].tolist()
        conds = [events[0] == e for e in events]
        flag = conds[0]
        for c in conds[1:]:
            flag *= c
        return flag

    def string2list(self, string): 
        """ Convert the trajectory string into an actual python list"""
        if isinstance(string, str):
            aux1 = string.strip('][').split('), (')
            if aux1[0] != '':
                aux2 = [x.strip(')(').split(', ') for x in aux1]
                trajectory = [(float(time), int(px), int(py)) for time,px,py in aux2]
            else:
                trajectory = []
        else:
            trajectory = []
        return trajectory

    def replace_drags(self, sample):
        """ Transform the dataframe to encode drag events"""
        samplecopy = sample.copy()
        samplecopy['drag2px'] = len(samplecopy)*[None]
        samplecopy['drag2py'] = len(samplecopy)*[None]
        for ix in self.find_drags(sample):
            samplecopy.loc[ix, 'event'] = 'pressed Button.left.drag'
            samplecopy.loc[ix+1, 'event'] = 'released Button.left.drag'
            samplecopy.loc[ix, 'drag2px'] = sample.loc[ix+1, 'px']
            samplecopy.loc[ix, 'drag2py'] = sample.loc[ix+1, 'py']
            samplecopy.at[ix, 'trajectory'] = sample.loc[ix+1, 'trajectory']
            samplecopy.at[ix+1, 'trajectory'] = []
        return samplecopy
    
    def find_drags(self, sample):
        rBleft_indexes = sample.index[sample['event'].map(lambda x: 'released Button.left' == x)]
        drags_indexes = []
        for ix in rBleft_indexes:
            length = len(sample.loc[ix,'trajectory'])
            p1 = (sample.loc[ix-1,'px'],sample.loc[ix-1,'py'])
            p2 = (sample.loc[ix,'px'], sample.loc[ix,'py'])
            if (length >= self.length_th) and ((abs(p1[0]-p2[0]) >= self.minpixels_th) or (abs(p1[1] - p2[1]) >= self.minpixels_th)): 
                drags_indexes.append(ix-1)
        return drags_indexes
    
    def replace_doubleclicks(self, sample):
        """ Transform the dataframe to encode double events """
        samplecopy = sample.copy()
        for ix0,ix1 in self.find_doubleclicks(sample):
            samplecopy.loc[ix0, 'event'] = 'pressed Button.left.double'
            samplecopy.loc[ix1+1, 'event'] = 'released Button.left.double'
            samplecopy = samplecopy.drop([ix0+1, ix1])
        return samplecopy

    def find_doubleclicks(self, sample):
        pBleft_indexes = sample.index[sample['event'].map(lambda x: 'pressed Button.left' == x)].tolist()
        dclicks_indexes = []
        while len(pBleft_indexes) > 1:
            ix0, ix1 = pBleft_indexes[0], pBleft_indexes[1]
            dt = sample.loc[ix1, 'timestamp'] - sample.loc[ix0, 'timestamp']
            p1 = (sample.loc[ix0, 'px'], sample.loc[ix0, 'py'])
            p2 = (sample.loc[ix1, 'px'], sample.loc[ix1, 'py'])
            if (dt <= self.dt_th) and (abs(p1[0]-p2[0]) <= self.maxpixels_th) and (abs(p1[1] - p2[1]) <= self.maxpixels_th): 
                dclicks_indexes.append((ix0, ix1))
                pBleft_indexes.pop(1)   
            pBleft_indexes.pop(0)
        return dclicks_indexes
    
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

ix_sample = 6
sample_paths = glob('/home/ezamorag/Datasets/pccloner/*/*/*/raw_pcdata*.csv')   #glob('../data/*/raw_pcdata*.csv')
sample_paths.sort()
[print(x) for x in sample_paths]
sample_path = sample_paths[ix_sample]
print('usamos este:',sample_path)
sample = pd.read_csv(sample_path)

processing = Preprocessing(length_th = 5, minpixels_th = 1, dt_th = 0.22, maxpixels_th = 0)
a = processing.run(sample)
a.to_csv('a.csv')

/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample1/raw_pcdata_2024-04-10_10-42-29.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample2/raw_pcdata_2024-04-10_13-18-08.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample3/raw_pcdata_2024-04-11_11-55-55.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample4/raw_pcdata_2024-04-15_11-43-00.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample5/raw_pcdata_2024-04-17_10-47-51.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample6/raw_pcdata_2024-04-18_11-24-46.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample7/raw_pcdata_2024-04-19_10-15-17.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample8/raw_pcdata_2024-04-19_11-36-17.csv
/home/ezamorag/Datasets/pccloner/saulo-pccloner15/data_ubuntu2004/sample9/raw_pcdata_2024-04-19_11-41-36.csv
/home/ezamorag/Data

In [52]:
def check(sample):
    # First event must always be a pressed event
    if 'released' in sample['event'][0]:
        print('The first event is a release!!! ...') 

    # Check every sample that every pressed key has its release event
    c = 0
    for i, event in enumerate(sample['event']): 
        if 'pressed' in event: 
            released_event = event.replace('pressed', 'released')
            ir = sample.loc[i:].index[sample.loc[i:,'event'] == released_event].tolist()
            if ir != []:
                iN = ir[0]
                if iN - i > 10:
                    print(i, iN - i, ' : ', sample.loc[i,'event'], '  ', sample.loc[iN,'event'])
                    c += 1
            else: 
                print(f'\nThere is no release event for {event} at the line = {i}\n')
    print(f'# of hoykeys candidates = {c}')

check(sample)

# of hoykeys candidates = 0


In [72]:
x = a['event'].unique().tolist()
print(len(x))
x.sort()
x

19


['Button.left',
 'Key.backspace',
 'Key.enter',
 'Key.shift_r+C',
 'Key.space',
 'a',
 'b',
 'd',
 'e',
 'g',
 'i',
 'j',
 'l',
 'n',
 'o',
 'r',
 's',
 't',
 'u']

In [17]:

"""
The next issues cause false positive hotkey detections  SOLVED!
    The next issues occur because Key.shift change the key name. 
    pressed # at the line = 341 ->  change 3 by # in the next "released 3"
    pressed C at the line = 3019 -> change c by C in the next "released c"
    2302 809  :  pressed =    released =  -> change "released 0" by "released =" at 2304
    4036 1161  :  pressed {    released {  -> change "released [" by released {" at 4038
    3118 45  :  pressed T    released T  -> change "released t" by "released T" at 3120

    

There are Scroll and Button events when a key is continuously pressed.  The  preprocessing is not prepared for this situation.

'Key.shift_r',   Key.shift issues!
'Key.shift_r+(',
'Key.shift_r+)',
'Key.shift_r+:',
'Key.shift_r+=',
'Key.shift_r+?',
'Key.shift_r+B',
'Key.shift_r+C',
'Key.shift_r+F',
'Key.shift_r+G',
'Key.shift_r+Key.ctrl',
'Key.shift_r+Key.ctrl+Z',
'Key.shift_r+Key.enter',
'Key.shift_r+Key.space',
'Key.shift_r+None',
'Key.shift_r+O',
'Key.shift_r+O+D',
'Key.shift_r+R',
'Key.shift_r+T',
'Key.shift_r+[',
'Key.shift_r+]',
'Key.shift_r+_',

'Key.space+c',   they can be treated like all_single_samecharacter, SOLVED!
'Key.space+d',
'Key.space+e',

'None',            Check mapping!!
'Button.button8',  He uses a special mouse with two additional buttons and horizontal scroll
 
'e+Button.left',  line 404   No problem to execute these weird combinations! It just increases the vocabulary unnecessary 
'Button.left+Key.space',
'{+Key.shift_r'  line 4038  People make mistakes?
'Key.enter+}',
'Key.enter+Key.ctrl',
'Key.ctrl_r+Key.shift_r',
'Key.backspace+z',
'Key.up+Key.tab', is correct?

There are more issues in Jordi data and Donaldo data!!!
"""

# Key.caps_lock does not change the value of pressed keys 

# Prepare the collector to save horizontal scroll

# Prepare the collector to save more than 1 scroll step 

308 18  :  pressed Key.shift_r    released Key.shift_r

There is no release event for pressed # at the line = 341

351 11  :  pressed Key.shift_r    released Key.shift_r
636 11  :  pressed Key.shift_r    released Key.shift_r
2020 12  :  pressed Key.ctrl    released Key.ctrl
2301 809  :  pressed =    released =

There is no release event for pressed C at the line = 3019

3117 45  :  pressed T    released T
4035 1161  :  pressed {    released {
4117 16  :  pressed Key.backspace    released Key.backspace
4118 15  :  pressed Key.backspace    released Key.backspace
4119 14  :  pressed Key.backspace    released Key.backspace
4120 13  :  pressed Key.backspace    released Key.backspace
4121 12  :  pressed Key.backspace    released Key.backspace
4122 11  :  pressed Key.backspace    released Key.backspace
4765 24  :  pressed Key.ctrl    released Key.ctrl
5114 15  :  pressed Key.ctrl    released Key.ctrl
# of hoykeys candidates = 15


"\n'Key.shift_r',   Key.shift issues!\n'Key.shift_r+(',\n'Key.shift_r+)',\n'Key.shift_r+:',\n'Key.shift_r+=',\n'Key.shift_r+?',\n'Key.shift_r+B',\n'Key.shift_r+C',\n'Key.shift_r+F',\n'Key.shift_r+G',\n'Key.shift_r+Key.ctrl',\n'Key.shift_r+Key.ctrl+Z',\n'Key.shift_r+Key.enter',\n'Key.shift_r+Key.space',\n'Key.shift_r+None',\n'Key.shift_r+O',\n'Key.shift_r+O+D',\n'Key.shift_r+R',\n'Key.shift_r+T',\n'Key.shift_r+[',\n'Key.shift_r+]',\n'Key.shift_r+_',\n\n'Key.space+c',   they can be treated like all_single_samecharacter, SOLVED!\n'Key.space+d',\n'Key.space+e',\n\n'None',            Check mapping!!\n'Button.button8',  He uses a special mouse with two additional buttons and horizontal scroll\n \n'e+Button.left',  line 404   No problem to execute these weird combinations! It just increases the vocabulary unnecessary \n'Button.left+Key.space',\n'{+Key.shift_r'  line 4038  People make mistakes?\n'Key.enter+}',\n'Key.enter+Key.ctrl',\n'Key.ctrl_r+Key.shift_r',\n'Key.backspace+z',\n'Key.up+Key.t

In [None]:
### Version 2 
    def replace_hotkeys(self, sample):
        samplecopy = sample.copy()
        for ix, ixN in self.find_hotkeys(samplecopy):
            ixs_pressed = sample[ix:ixN].index[sample['event'][ix:ixN].map(lambda x: 'pressed' in x)]
            newevent = 'pressed ' + samplecopy.loc[ixs_pressed[0], 'event'].replace('pressed ', '')
            # event1 + ... + eventN
            if self.are_pressedkeys_consecutive(ixs_pressed.tolist()): 
                for ix_pressed in ixs_pressed[1:]: 
                    newevent += '+' + samplecopy.loc[ix_pressed, 'event'].replace('pressed ', '')
                samplecopy.loc[ixs_pressed[-1], 'event'] = newevent
                remove_ixs = ixs_pressed[:-1].tolist() + sample[ix:ixN+1].index[sample['event'][ix:ixN+1].map(lambda x: 'released' in x)].tolist()
                samplecopy = samplecopy.drop(remove_ixs)
            # event1 + {event2 + ... + eventN}
            elif self.are_sequentialhotkeys(ixs_pressed[1:].tolist()):
                for ix_pressed in ixs_pressed[1:]:
                    samplecopy.loc[ix_pressed, 'event'] = newevent + '+' + samplecopy.loc[ix_pressed, 'event'].replace('pressed ', '')
                    samplecopy = samplecopy.drop([ix_pressed+1])
                samplecopy = samplecopy.drop([ixs_pressed[0], ixN])
            # Other cases are not include: like event1 + event2 + {event3 + ... + eventN}
            else:
                # what to do? Nothing. 
                pass
        return samplecopy 
    
    def are_sequentialhotkeys(self, numbers_sorted):   
        for i in range(len(numbers_sorted) - 1):
            if numbers_sorted[i] + 2 != numbers_sorted[i + 1]:
                return False
        return True
    
    def are_pressedkeys_consecutive(self, numbers_sorted):   
        for i in range(len(numbers_sorted) - 1):
            if numbers_sorted[i] + 1 != numbers_sorted[i + 1]:
                return False
        return True
    
    def find_hotkeys(self, sample):
        ixs_pressed = sample.index[sample['event'].map(lambda x: 'pressed' in x)].tolist()
        hotkeys_indexes = []
        while len(ixs_pressed) >= 2:  # To not include the end Key.esc
            ix = ixs_pressed[0]
            released1 = sample['event'][ix].replace('pressed', 'released')
            ixN = sample[ix:].index[sample['event'][ix:] == released1][0]
            if ixN - ix >= 3:  # 3 para evitar que al escribir aparezcan hotkeys. Cuidado!! es una condicion debil
                hotkeys_indexes.append((ix, ixN))
            for i in range(ix,ixN):
                try: 
                    ixs_pressed.remove(i)
                except:
                    pass
        return hotkeys_indexes

In [None]:
### Version 1


In [None]:
        if '+' in action: 
            hotkeys = [self.classify_keystroke(hotkey)[0] for hotkey in action.split('+')] 
            for key in hotkeys:
                self.keyboard.press(key)
            for key in hotkeys[::-1]:
                self.keyboard.release(key)

In [2]:
    def run(self, action, position, endposition, delay): 
        # Hotkeys
        if '+' in action:
            hotkeys = action.split('+')
            for key in hotkeys:
                if self.is_mouse(key): 
                    self.mouse_mapping.get(key, self.none_mouse)(position, endposition)
                else: 
                    keycode = self.classify_keystroke(key)[0]
                    self.keyboard.press(keycode)
            for key in hotkeys[::-1]:
                if self.is_mouse(key): 
                    pass
                else:
                    keycode = self.classify_keystroke(key)[0]
                    self.keyboard.release(keycode)
        # Mouse
        elif self.is_mouse(action):
            self.mouse_mapping.get(action, self.none_mouse)(position, endposition)
        # Single key
        else:
            keys = self.classify_keystroke(action)
            if len(keys) == 1:  
                self.push(keys[0])
            else:
                print('This keystroke was not execute ->', action)
        time.sleep(delay)

['o']

In [None]:
    def find_continouskeys(self, sample):
        ixs_pressed = sample.index[sample['event'].map(lambda x: 'pressed' in x)].tolist()
        ixs_pressed_groups = self.group_consecutive(ixs_pressed) 
        ixs_pressed_groups = [x for x in ixs_pressed_groups if len(x) > 1]
        ck_indexes = []
        for ixs_pressed_group in ixs_pressed_groups:
            ix1, ixN = ixs_pressed_group[0], ixs_pressed_group[-1]
            same_event = sample['event'][ix1]
            Nevents_cond = (sample['event'][ix1:ixN+1] ==  same_event).sum() == len(ixs_pressed_group)
            end_cond = sample['event'][ixN+1] == same_event.replace('pressed', 'released')
            if Nevents_cond and end_cond: 
                ck_indexes.append((ix1, ixN))
        return ck_indexes