In [26]:
# A simpler approach would be not to do this preprocessing and do replay including releases
# Avoiding hoykey, double clicks and drags detections 
class Preprocessing():
    def __init__(self, length_th, minpixels_th, dt_th, maxpixels_th): 
        # The thresholds for drag and doubleclicks detection determine the correct replay of task, how important are to calibrate them? 
        # Many of these functions make copies of dataframe, so it is not efficient. 
        # Drags
        self.length_th = length_th 
        self.minpixels_th = minpixels_th
        # Double clicks
        self.dt_th = dt_th 
        self.maxpixels_th = maxpixels_th

    def run(self, sample): 
        s1 = sample.copy()                                                          # The order of following processing is important
        s1['trajectory'] = s1['trajectory'].map(lambda x: self.string2list(x))      # Convert a list represented as a string into a actual list
        s1 = self.capsnumlocks_conversion(s1)                                       # Conversion according with num_lock and caps_lock states
        s1 = self.replace_drags(s1)                                                 # Detect drag events
        s1 = self.replace_doubleclicks(s1)                                          # Detect doubleclicks
        delays = s1['timestamp'][1:].values - s1['timestamp'][0:-1].values          # Calculating delays
        delays = np.append(delays,0.0)                                                
        s1['delay'] = delays
        actions = s1.reset_index(drop=True)                                     # Reset dataframe index
        return actions
    
    def capsnumlocks_conversion(self, sample):
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True)
        numlock_flag = False
        capslock_flag = False
        for i, event in enumerate(samplecopy['event']): 
            if event == 'pressed Key.caps_lock':
                capslock_flag = not capslock_flag 
            elif event == 'pressed Key.num_lock':
                numlock_flag = not numlock_flag
            elif '<65437>' in event: #Special case in Ubuntu only
                if numlock_flag: 
                    samplecopy.loc[i,'event'] = event.replace('<65437>', '5')
                else:
                    samplecopy.loc[i,'event'] = event.replace('<65437>', '')
            elif len(event.replace('pressed ','')) == 1 or len(event.replace('released ','')) == 1:
                if capslock_flag:
                    samplecopy.loc[i,'event'] = event[:-1] + event[-1].upper()
                else:
                    samplecopy.loc[i,'event'] = event[:-1] + event[-1].lower()
            else:
                pass
        return samplecopy 
       
    def string2list(self, string): 
        """ Convert the trajectory string into an actual python list"""
        if isinstance(string, str):
            aux1 = string.strip('][').split('), (')
            if aux1[0] != '':
                aux2 = [x.strip(')(').split(', ') for x in aux1]
                trajectory = [(float(time), int(px), int(py)) for time,px,py in aux2]
            else:
                trajectory = []
        else:
            trajectory = []
        return trajectory

    def replace_drags(self, sample):
        """ Transform the dataframe to encode drag events"""
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True) 
        drags_indexes = self.find_drags(samplecopy)
        for ix in drags_indexes:
            samplecopy.loc[ix+1, 'px'] = samplecopy['px'][ix]
            samplecopy.loc[ix+1, 'py'] = samplecopy['py'][ix]
            samplecopy.loc[ix+1, 'event'] = 'Button.left.drag'
        samplecopy = samplecopy.drop(drags_indexes)
        return samplecopy
    
    def find_drags(self, sample):
        rBleft_indexes = sample.index[sample['event'].map(lambda x: 'released Button.left' == x)]
        drags_indexes = []
        for ix in rBleft_indexes:
            if sample['event'][ix-1] == 'pressed Button.left':
                length = len(sample.loc[ix,'trajectory'])
                p1 = (sample.loc[ix-1,'px'],sample.loc[ix-1,'py'])
                p2 = (sample.loc[ix,'px'], sample.loc[ix,'py'])
                if (length >= self.length_th) and ((abs(p1[0]-p2[0]) >= self.minpixels_th) or (abs(p1[1] - p2[1]) >= self.minpixels_th)): 
                    drags_indexes.append(ix-1)
        return drags_indexes
    
    def replace_doubleclicks(self, sample):
        """ Transform the dataframe to encode double events """
        samplecopy = sample.copy()
        samplecopy = samplecopy.reset_index(drop=True) 
        for ix0,ix1 in self.find_doubleclicks(samplecopy):
            samplecopy.loc[ix0, 'event'] = 'Button.left.double'
            samplecopy.loc[ix0, 'timestamp'] = samplecopy['timestamp'][ix1+1]
            samplecopy = samplecopy.drop([ix0+1, ix1, ix1+1])
        return samplecopy

    def find_doubleclicks(self, sample):
        pBleft_indexes = sample.index[sample['event'].map(lambda x: 'pressed Button.left' == x)].tolist()
        dclicks_indexes = []
        while len(pBleft_indexes) > 1:
            ix0, ix1 = pBleft_indexes[0], pBleft_indexes[1]
            dt = sample.loc[ix1, 'timestamp'] - sample.loc[ix0, 'timestamp']
            p1 = (sample.loc[ix0, 'px'], sample.loc[ix0, 'py'])
            p2 = (sample.loc[ix1, 'px'], sample.loc[ix1, 'py'])
            if (dt <= self.dt_th) and (abs(p1[0]-p2[0]) <= self.maxpixels_th) and (abs(p1[1] - p2[1]) <= self.maxpixels_th): 
                dclicks_indexes.append((ix0, ix1))
                pBleft_indexes.pop(1)   
            pBleft_indexes.pop(0)
        return dclicks_indexes
    

    
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

ix_sample = -1
sample_paths = glob('../data/*/raw_pcdata*.csv')  #glob('/home/ezamorag/Datasets/pccloner/*/*/*/raw_pcdata*.csv')   #
sample_paths.sort()
[print(x) for x in sample_paths]
sample_path = sample_paths[ix_sample]
print('usamos este:',sample_path)
sample = pd.read_csv(sample_path)

processing = Preprocessing(length_th = 5, minpixels_th = 1, dt_th = 0.22, maxpixels_th = 0)
a = processing.run(sample)
a.to_csv('a_new.csv')
a

../data/sample001/raw_pcdata.csv
../data/sample002/raw_pcdata.csv
usamos este: ../data/sample002/raw_pcdata.csv


Unnamed: 0,timestamp,img_path,px,py,event,trajectory,delay
0,0.527965,data/sample002/screen0000000000_0.499760619999...,37,139,pressed Button.left,[],0.110092
1,0.638058,data/sample002/screen0000000001_0.593478916000...,37,139,released Button.left,[],1.635163
2,2.273221,data/sample002/screen0000000002_2.239227239000...,1763,46,pressed Button.left,"[(0.9911508719997073, 39, 139), (0.99127806899...",0.166711
3,2.439932,data/sample002/screen0000000003_2.411277170000...,1763,46,released Button.left,"[(2.4617967949998274, 1764, 46)]",2.143951
4,4.583883,data/sample002/screen0000000005_4.547473267999...,1647,45,Button.left.drag,"[(3.7002405540006293, 1647, 46), (3.7319796899...",1.545828
...,...,...,...,...,...,...,...
94,55.591541,data/sample002/screen0000000110_55.26556803899...,1477,499,Button.left.double,"[(54.82503558799908, 1493, 503), (54.841960753...",1.262001
95,56.853542,data/sample002/screen0000000114_56.82309956700...,1903,45,pressed Button.left,"[(55.827329129000645, 1477, 497), (55.82801311...",0.099342
96,56.952883,data/sample002/screen0000000115_56.92512702800...,1903,45,released Button.left,[],1.692543
97,58.645427,data/sample002/screen0000000116_58.61657802799...,1839,51,pressed Button.left,"[(57.87953064800058, 1902, 45), (57.8796400800...",0.162350


In [27]:
a[:60]

Unnamed: 0,timestamp,img_path,px,py,event,trajectory,delay
0,0.527965,data/sample002/screen0000000000_0.499760619999...,37,139,pressed Button.left,[],0.110092
1,0.638058,data/sample002/screen0000000001_0.593478916000...,37,139,released Button.left,[],1.635163
2,2.273221,data/sample002/screen0000000002_2.239227239000...,1763,46,pressed Button.left,"[(0.9911508719997073, 39, 139), (0.99127806899...",0.166711
3,2.439932,data/sample002/screen0000000003_2.411277170000...,1763,46,released Button.left,"[(2.4617967949998274, 1764, 46)]",2.143951
4,4.583883,data/sample002/screen0000000005_4.547473267999...,1647,45,Button.left.drag,"[(3.7002405540006293, 1647, 46), (3.7319796899...",1.545828
5,6.12971,data/sample002/screen0000000006_5.830347570999...,602,46,Button.left.double,"[(4.6756228060003195, 1652, 293), (4.691706916...",2.259673
6,8.389384,data/sample002/screen0000000011_8.359830383998...,586,86,Button.left.drag,"[(7.457379875999322, 588, 86), (7.574391292999...",1.037391
7,9.426775,data/sample002/screen0000000013_9.395220512999...,537,740,Button.left.drag,"[(8.993965821000529, 539, 740), (9.01535805700...",1.414464
8,10.841239,data/sample002/screen0000000015_10.81317797799...,1282,781,Button.left.drag,"[(10.302596000999984, 1283, 781), (10.31938606...",0.84814
9,11.689379,data/sample002/screen0000000016_11.41424321600...,1404,155,Button.left.double,"[(11.03412880399992, 1410, 156), (11.034253472...",3.181238


In [25]:
'pressed ' in 'Button.left.drag'

False

In [79]:
def check(sample):
    # First event must always be a pressed event
    if 'released' in sample['event'][0]:
        print('The first event is a release!!! ...') 

    # Check every sample that every pressed key has its release event
    c = 0
    for i, event in enumerate(sample['event']): 
        if 'pressed' in event: 
            released_event = event.replace('pressed', 'released')
            ir = sample.loc[i:].index[sample.loc[i:,'event'] == released_event].tolist()
            if ir != []:
                iN = ir[0]
                if iN - i > 10:
                    print(i, iN - i, ' : ', sample.loc[i,'event'], '  ', sample.loc[iN,'event'])
                    c += 1
            else: 
                print(f'\nThere is no release event for {event} at the line = {i}\n')
    print(f'# of hoykeys candidates = {c}')

check(sample)


There is no release event for pressed ¨ at the line = 19

# of hoykeys candidates = 0


In [80]:
x = a['event'].unique().tolist()
print(len(x))
x.sort()
x

7


['Button.left',
 'Button.middle',
 'Button.right',
 'Key.shift_r+¨',
 'Scroll.down',
 'Scroll.up',
 '´']

In [1]:
### Version 2 
    def replace_hotkeys(self, sample):
        samplecopy = sample.copy()
        for ix, ixN in self.find_hotkeys(samplecopy):
            ixs_pressed = sample[ix:ixN].index[sample['event'][ix:ixN].map(lambda x: 'pressed' in x)]
            newevent = 'pressed ' + samplecopy.loc[ixs_pressed[0], 'event'].replace('pressed ', '')
            # event1 + ... + eventN
            if self.are_pressedkeys_consecutive(ixs_pressed.tolist()): 
                for ix_pressed in ixs_pressed[1:]: 
                    newevent += '+' + samplecopy.loc[ix_pressed, 'event'].replace('pressed ', '')
                samplecopy.loc[ixs_pressed[-1], 'event'] = newevent
                remove_ixs = ixs_pressed[:-1].tolist() + sample[ix:ixN+1].index[sample['event'][ix:ixN+1].map(lambda x: 'released' in x)].tolist()
                samplecopy = samplecopy.drop(remove_ixs)
            # event1 + {event2 + ... + eventN}
            elif self.are_sequentialhotkeys(ixs_pressed[1:].tolist()):
                for ix_pressed in ixs_pressed[1:]:
                    samplecopy.loc[ix_pressed, 'event'] = newevent + '+' + samplecopy.loc[ix_pressed, 'event'].replace('pressed ', '')
                    samplecopy = samplecopy.drop([ix_pressed+1])
                samplecopy = samplecopy.drop([ixs_pressed[0], ixN])
            # Other cases are not include: like event1 + event2 + {event3 + ... + eventN}
            else:
                # what to do? Nothing. 
                pass
        return samplecopy 
    
    def are_sequentialhotkeys(self, numbers_sorted):   
        for i in range(len(numbers_sorted) - 1):
            if numbers_sorted[i] + 2 != numbers_sorted[i + 1]:
                return False
        return True
    
    def are_pressedkeys_consecutive(self, numbers_sorted):   
        for i in range(len(numbers_sorted) - 1):
            if numbers_sorted[i] + 1 != numbers_sorted[i + 1]:
                return False
        return True
    
    def find_hotkeys(self, sample):
        ixs_pressed = sample.index[sample['event'].map(lambda x: 'pressed' in x)].tolist()
        hotkeys_indexes = []
        while len(ixs_pressed) >= 2:  # To not include the end Key.esc
            ix = ixs_pressed[0]
            released1 = sample['event'][ix].replace('pressed', 'released')
            ixN = sample[ix:].index[sample['event'][ix:] == released1][0]
            if ixN - ix >= 3:  # 3 para evitar que al escribir aparezcan hotkeys. Cuidado!! es una condicion debil
                hotkeys_indexes.append((ix, ixN))
            for i in range(ix,ixN):
                try: 
                    ixs_pressed.remove(i)
                except:
                    pass
        return hotkeys_indexes

IndentationError: unexpected indent (3952036838.py, line 2)

In [None]:
### Version 1


In [None]:
        if '+' in action: 
            hotkeys = [self.classify_keystroke(hotkey)[0] for hotkey in action.split('+')] 
            for key in hotkeys:
                self.keyboard.press(key)
            for key in hotkeys[::-1]:
                self.keyboard.release(key)

In [2]:
    def run(self, action, position, endposition, delay): 
        # Hotkeys
        if '+' in action:
            hotkeys = action.split('+')
            for key in hotkeys:
                if self.is_mouse(key): 
                    self.mouse_mapping.get(key, self.none_mouse)(position, endposition)
                else: 
                    keycode = self.classify_keystroke(key)[0]
                    self.keyboard.press(keycode)
            for key in hotkeys[::-1]:
                if self.is_mouse(key): 
                    pass
                else:
                    keycode = self.classify_keystroke(key)[0]
                    self.keyboard.release(keycode)
        # Mouse
        elif self.is_mouse(action):
            self.mouse_mapping.get(action, self.none_mouse)(position, endposition)
        # Single key
        else:
            keys = self.classify_keystroke(action)
            if len(keys) == 1:  
                self.push(keys[0])
            else:
                print('This keystroke was not execute ->', action)
        time.sleep(delay)

['o']

In [None]:
    def find_continouskeys(self, sample):
        ixs_pressed = sample.index[sample['event'].map(lambda x: 'pressed' in x)].tolist()
        ixs_pressed_groups = self.group_consecutive(ixs_pressed) 
        ixs_pressed_groups = [x for x in ixs_pressed_groups if len(x) > 1]
        ck_indexes = []
        for ixs_pressed_group in ixs_pressed_groups:
            ix1, ixN = ixs_pressed_group[0], ixs_pressed_group[-1]
            same_event = sample['event'][ix1]
            Nevents_cond = (sample['event'][ix1:ixN+1] ==  same_event).sum() == len(ixs_pressed_group)
            end_cond = sample['event'][ixN+1] == same_event.replace('pressed', 'released')
            if Nevents_cond and end_cond: 
                ck_indexes.append((ix1, ixN))
        return ck_indexes