Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

A lot of refactoring and the start of selfstats

  • Loading branch information...
commit 113badaf2700cfe151a50c9f34aa3db508609c73 1 parent 7694185
David Fendrich authored
View
81 activity_store.py
@@ -1,8 +1,11 @@
+import zlib
+import json
import time
from datetime import datetime
NOW = datetime.now
import Xlib.error
+import cPickle #remove after mem profile
import sniff_x
import models
@@ -10,40 +13,22 @@
SKIP_SET = {'Shift_L', 'Shift_R'}
-"""
-Todo:
- optional crypto on Keys.text and Keys.timings
- timings in json
- compress text and timings (check size difference on existing db)
---
- simple utility for reading and stats
- take pw from default config file, if exists
---
- ask for pw in tk, if not command line
--
- README
--
- test map switch
- general testing
- no printing
- remove stdout and stderr from DaemonContext
--
-
-
----Later
- documentation, unittests, pychecker ;)
- replay key and mouse for process and time interval (maybe store as macro)
- word search
-
-"""
-
#Mouse buttons: left button: 1, middle: 2, right: 3, scroll up: 4, down:5
+def pad(s, padnum):
+ ls = len(s)
+ if ls % padnum == 0:
+ return s
+ return s + '\0' * (padnum - (ls % padnum))
+
class ActivityStore:
- def __init__(self, db_name):
+ def __init__(self, db_name, encrypter=None):
self.session_maker = models.initialize(db_name)
self.session = None
+ if encrypter:
+ self.encrypter = encrypter
+
self.nrmoves = 0
self.latestx = 0
self.latesty = 0
@@ -52,6 +37,7 @@ def __init__(self, db_name):
self.curtext = u""
self.timings = []
+ self.last_key_time = time.time()
self.started = NOW()
self.cur_class = None
@@ -71,6 +57,17 @@ def run(self):
def close(self):
self.sniffer.cancel()
+ self.store_keys()
+
+ def maybe_encrypt(self, s):
+ if self.encrypter:
+ s = pad(s, 8)
+ s = self.encrypter.encrypt(s)
+ return s
+
+ def timings_to_str(self):
+ z = zlib.compress(json.dumps(self.timings))
+ return self.maybe_encrypt(z)
def store_window(self):
cur_window = self.session.query(Window).filter_by(title=self.cur_name.decode('latin1'), process_id=self.cur_process_id).scalar()
@@ -99,14 +96,19 @@ def store_click(self, button, press):
def store_keys(self):
if self.timings:
self.maybe_end_specials()
- print 'keys', len(self.timings)
-
- self.session.add(Keys(self.curtext, self.timings, self.started, self.cur_win_id, self.cur_geo_id))
+
+ enc_timings = self.timings_to_str()
+ enc_curtext = self.maybe_encrypt(self.curtext.encode('utf8'))
+
+ self.session.add(Keys(enc_curtext, enc_timings, self.started, self.cur_win_id, self.cur_geo_id))
self.session.commit()
+ print 'keys', len(self.timings), len(cPickle.dumps(self.timings, 2)), len(enc_timings)
+
self.started = NOW()
self.curtext = u""
self.timings = []
+ self.last_key_time = time.time()
def get_cur_window(self):
i = 0
@@ -136,7 +138,18 @@ def get_cur_window(self):
def check_geometry(self):
- geo = self.cur_window.get_geometry()
+ i = 0
+ while True:
+ try:
+ geo = self.cur_window.get_geometry()
+ break
+ except Xlib.error.XError:
+ print 'Badwin in geo'
+ i += 1
+ if i >= 10:
+ print 'Really bad win in geo'
+ return
+
cur_geo = self.session.query(Geometry).filter_by(xpos=geo.x, ypos=geo.y, width=geo.width, height=geo.height).scalar()
if cur_geo is None:
cur_geo = Geometry(geo)
@@ -171,6 +184,7 @@ def log_cur_window(self):
self.check_geometry()
def got_key(self, keycode, state, s, press):
+ now = time.time()
self.log_cur_window()
if press:
if s not in SKIP_SET and not (s[0] == '[' and s[-1] == ']'):
@@ -186,7 +200,8 @@ def got_key(self, keycode, state, s, press):
self.specials_in_row += 1
self.lastspecial = s
if self.specials_in_row < 2:
- self.timings.append((s, time.time()))
+ self.timings.append((s, now - self.last_key_time))
+ self.last_key_time = now
def got_mouse_click(self, button, press):
self.log_cur_window()
View
23 check_password.py
@@ -0,0 +1,23 @@
+import os
+
+DIGEST_NAME = 'password.digest'
+MAGIC_STRING = '\xc5\x7fdh\x05\xf6\xc5=\xcfh\xafv\xc0\xf4\x13i*.O\xf6\xc2\x8d\x0f\x87\xdb\x9f\xc2\x88\xac\x95\xf8\xf0\xf4\x96\xe9\x82\xd1\xca[\xe5\xa32\xa0\x03\nD\x12\n\x1dr\xbc\x03\x9bE\xd3q6\x89Cwi\x10\x92\xdf(#\x8c\x87\x1b3\xd6\xd4\x8f\xde)\xbe\x17\xbf\xe4\xae\xb73\\\xcb\x7f\xd3\xc4\x89\xd0\x88\x07\x90\xd8N,\xbd\xbd\x93j\xc7\xa3\xec\xf3P\xff\x11\xde\xc9\xd6 \x98\xe8\xbc\xa0|\x83\xe90Nw\xe4=\xb53\x08\xf0\x14\xaa\xf9\x819,X~\x8e\xf7mB\x13\xe9;\xde\x9e\x10\xba\x19\x95\xd4p\xa7\xd2\xa9o\xbdF\xcd\x83\xec\xc5R\x17":K\xceAiX\xc1\xe8\xbe\xb8\x04m\xbefA8\x99\xee\x00\x93\xb4\x00\xb3\xd4\x8f\x00@Q\xe9\xd5\xdd\xff\x8d\x93\xe3w6\x8ctRQK\xa9\x97a\xc1UE\xdfv\xda\x15\xf5\xccA)\xec^]AW\x17/h)\x12\x89\x15\x0e#8"\x7f\x16\xd6e\x91\xa6\xd8\xea \xb9\xdb\x93W\xce9\xf2a\xe7\xa7T=q'
+
+
+def check(data_dir, decrypter):
+ fname = os.path.join(data_dir, DIGEST_NAME)
+ if os.path.exists(fname):
+ if decrypter is None:
+ return False
+ f = open(fname, 'rb')
+ s = f.read()
+ f.close()
+ return decrypter.decrypt(s) == MAGIC_STRING
+ else:
+ if decrypter is not None:
+ s = decrypter.encrypt(MAGIC_STRING)
+ f = open(fname, 'wb')
+ f.write(s)
+ f.close()
+ return True
+
View
6 models.py
@@ -1,7 +1,7 @@
import datetime
from sqlalchemy.ext.declarative import declarative_base, declared_attr
-from sqlalchemy import Index, Column, Boolean, Integer, Unicode, UnicodeText, DateTime, PickleType, ForeignKey, create_engine
+from sqlalchemy import Index, Column, Boolean, Integer, Unicode, UnicodeText, DateTime, Binary, ForeignKey, create_engine
from sqlalchemy.orm import sessionmaker, relationship, backref
def initialize(fname):
@@ -89,7 +89,7 @@ def __repr__(self):
return "<Click (%d, %d), (%d, %d, %d)>" % (self.xpos, self.ypos, self.button, self.press, self.nrmoves)
class Keys(SpookMixin, Base):
- text = Column(UnicodeText, nullable=False)
+ text = Column(Binary, nullable=False)
started = Column(DateTime, nullable=False)
window_id = Column(Integer, ForeignKey('window.id'), nullable=False)
@@ -98,7 +98,7 @@ class Keys(SpookMixin, Base):
geometry_id = Column(Integer, ForeignKey('geometry.id'), nullable=False)
geometry = relationship("Geometry", backref=backref('keys'))
- timings = Column(PickleType)
+ timings = Column(Binary)
def __init__(self, text, timings, started, window_id, geometry_id):
self.text = text
View
20 password_dialog.py
@@ -0,0 +1,20 @@
+import sys
+import getpass
+
+from Tkinter import *
+import tkSimpleDialog
+
+def get_password():
+ if sys.stdin.isatty():
+ return getpass.getpass()
+ else:
+ return get_tk_password()
+
+
+def get_tk_password():
+ root = Tk()
+ root.withdraw()
+ return tkSimpleDialog.askstring(title='Selfspy encryption password', prompt='Password', show='*', parent=root)
+
+if __name__ == '__main__':
+ print get_password()
View
75 selfspy.py
@@ -13,11 +13,43 @@
import grp
import pwd
+import hashlib
+from Crypto.Cipher import Blowfish
+
from activity_store import ActivityStore
+from password_dialog import get_password
+import check_password
+
+"""
+
+Todo:
+ implement selfstats functionality
+--
+ allow not-text argument to avoid storing text at all. This makes the program never ask for passwords
+ remove guid and uid flags
+
+ periodic emails from selfspy (or perhaps just have note in the README on how this can be accomplished with cron, mail and selfstats?)
+-
+ README
+--
+ test map switch
+ general testing
+ no printing
+ remove stdout and stderr from DaemonContext
+ remove cPickle
+
+
+---Later
+ code documentation, unittests, pychecker ;)
+ replay key and mouse for process and time interval (maybe store as macro)
+ word search
+ calculate personal keymap
+
+"""
-DATA_DIR = '/var/lib/selfspy'
+DATA_DIR = '~/.selfspy'
DBNAME = 'selfspy.sqlite'
-LOCK_FILE = '/var/run/selfspy/selfspy.pid'
+LOCK_FILE = 'selfspy.pid'
def parse_config():
conf_parser = argparse.ArgumentParser(description=__doc__, add_help=False,
@@ -34,11 +66,11 @@ def parse_config():
parser = argparse.ArgumentParser(description='Monitor your computer activities and store them in an encrypted database for later analysis or disaster recovery.', parents=[conf_parser])
parser.set_defaults(**defaults)
- parser.add_argument('-p', '--password', help='Encryption password. If you want to keep your database unencrypted, specify -p "" here. If you don\'t specify a password in the command line arguments or (preferable) in a config file, a dialog will pop up, asking for the password each time this program is run. Usually when X starts.')
+ parser.add_argument('-p', '--password', help='Encryption password. If you want to keep your database unencrypted, specify -p "" here. If you don\'t specify a password in the command line arguments or in a config file, a dialog will pop up, asking for the password. The most secure is to not use either command line or config file and instead type it in on startup.')
parser.add_argument('-d', '--data-dir', help='Data directory for selfspy, where the database is stored. Remember that Selfspy must have read/write access. Default is %s' % DATA_DIR, default=DATA_DIR)
- parser.add_argument('-l', '--lock-file', help='Lock file. Default is %s' % LOCK_FILE, default=LOCK_FILE)
- parser.add_argument('-u', '--uid', help='User ID to switch process to on daemon start. You can specify either name or number. Default is to keep process uid.', default=os.getuid())
- parser.add_argument('-g', '--gid', help='Group ID to switch process to on daemon start. You can specify either name or number. Default is to keep process gid.', default=os.getgid())
+ #These are probably pointless, as the daemon should be run by the local user anyway
+ parser.add_argument('-u', '--uid', help='User ID to switch process to on daemon start. You can specify either name or number. Default is to keep process uid, which is probably what you want.', default=os.getuid())
+ parser.add_argument('-g', '--gid', help='Group ID to switch process to on daemon start. You can specify either name or number. Default is to keep process gid, which is probably what you want.', default=os.getgid())
return parser.parse_args()
@@ -55,14 +87,21 @@ def parse_config():
except ValueError:
args['uid'] = pwd.getpwnam(args['uid']).pw_gid
- print args
+ args['data_dir'] = os.path.expanduser(args['data_dir'])
+ print args #TODO: remove
- lock = lockfile.FileLock(args['lock_file'])
+ try:
+ os.makedirs(args['data_dir'])
+ except OSError:
+ pass
+
+ lockname = os.path.join(args['data_dir'], LOCK_FILE)
+ lock = lockfile.FileLock(lockname)
if lock.is_locked():
- print '%s is locked! I am probably already running.' % args['lock_file']
+ print '%s is locked! I am probably already running.' % lockname
print 'If you can find no selfspy process running, it is a stale lock and you can safely remove it.'
print 'Shutting down.'
- sys.exit()
+ sys.exit(1)
context = daemon.DaemonContext(
working_directory=args['data_dir'],
@@ -78,8 +117,22 @@ def parse_config():
signal.SIGHUP: 'terminate'
}
+
+ if args['password'] is None:
+ args['password'] = get_password()
+
+ if args['password'] == "":
+ encrypter = None
+ else:
+ encrypter = Blowfish.new(hashlib.md5(args['password']).digest())
+
+ if not check_password.check(args['data_dir'], encrypter):
+ print 'Password failed'
+ sys.exit(1)
+
with context:
- astore = ActivityStore(os.path.join(args['data_dir'], DBNAME))
+ astore = ActivityStore(os.path.join(args['data_dir'], DBNAME), encrypter)
+
try:
astore.run()
except SystemExit:
View
150 selfstats.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+import argparse
+import ConfigParser
+
+from Crypto.Cipher import Blowfish
+import hashlib
+
+from selfspy import DATA_DIR, DBNAME
+from password_dialog import get_password
+import check_password
+
+"""
+ add database fields to allow more of the summaries without a password
+ note in the help which commands require password
+
+ add decryption and unpacking (and packing) to models.py
+ add lazy iterator to models.py
+
+ add filter to models.py
+
+ do_filter
+
+--
+ check_need_text
+ check_is_summary
+ get password
+
+ print listing
+
+ calc summary
+ print summary
+"""
+class Selfstats:
+ def __init__(self, args):
+ self.args = args
+
+ self.check_need_text()
+ if self.need_text:
+ pass #get password
+ self.do_filter()
+
+ if self.check_is_summary():
+ self.calc_summary()
+ self.show_summary()
+ else:
+ self.show_rows()
+
+ def do_filter(self):
+ pass
+
+ def show_rows(self):
+ #tabulate data
+ for row in rows:
+ if text:
+ pass
+ else:
+ pass
+
+ def calc_summary(self):
+ sumd = {}
+ for row in rows:
+ pass
+
+
+ def show_summary(self):
+ pass
+
+ def check_need_text(self):
+ self.need_text = True
+
+ def check_is_summary(self):
+ self.is_summary = True
+
+def parse_config():
+ conf_parser = argparse.ArgumentParser(description=__doc__, add_help=False,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+
+ conf_parser.add_argument("-c", "--config",
+ help="""Config file with defaults. Command line parameters will override those given in the config file. Options to selfspy goes in the "[Defaults]" section, followed by [argument]=[value] on each line. Options specific to selfstats should be in the "[Selfstats]" section, though "password" and "data-dir" are still read from "[Defaults]".""", metavar="FILE")
+ args, remaining_argv = conf_parser.parse_known_args()
+
+ defaults = {}
+ if args.config:
+ config = ConfigParser.SafeConfigParser()
+ config.read([args.config])
+ defaults = dict(config.items('Defaults'))
+
+ parser = argparse.ArgumentParser(description="""Calculate statistics on selfspy data. Per default it will show non-text information that matches the filter. Adding '-s' means also show text. Adding any of the summary options will show those summaries over the given filter instead of the listing. Multiple summary options can be given to print several summaries over the same filter. If you give arguments that need to access text / keystrokes, you will be asked for the decryption password.""", epilog="""See the README file or http://github.com/gurgeh/selfspy for examples.""", parents=[conf_parser])
+ parser.set_defaults(**defaults)
+ parser.add_argument('-p', '--password', help='Decryption password. Only needed if selfstats needs to access text / keystrokes data. If your database in not encrypted, specify -p="" here. If you don\'t specify a password in the command line arguments or in a config file, and the statistics you ask for require a password, a dialog will pop up asking for the password. If you give your password on the command line, remember that it will most likely be stored in plain text in your shell history.')
+ parser.add_argument('-d', '--data-dir', help='Data directory for selfspy, where the database is stored. Remember that Selfspy must have read/write access. Default is %s' % DATA_DIR, default=DATA_DIR)
+
+ parser.add_argument('-a', '--date', nargs='+', help='Which date to start the listing or summarizing from. If only one argument is given (--date 13) it is interpreted as the closest date in the past on that day. If two arguments are given (--date 03 13) it is interpreted as the closest date in the past on that month and that day, in that order. If three arguments are given (--date 2012 03 13) it is interpreted as YYYY MM DD')
+ parser.add_argument('-c', '--clock', type=str, help='Time to start the listing or summarizing from. Given in 24 hour format as --clock 13:25. If no --date is given, interpret the time as today if that results in sometimes in the past, otherwise as yesterday.')
+
+ parser.add_argument('-i', '--id', type=int, help='Which row ID to start the listing or summarizing from. If --date and/or --clock is give, this option is ignored.')
+
+ parser.add_argument('-p', '--period', help='--period <limit> [<unit>]. If no unit is given, period limits the number of rows in the result to <limit>. Otherwise the limit is a time period given by <unit>. <unit> is either "s" (seconds), "m" (minutes), "h" (hours), "d" (days) or "w" (weeks)', nargs='+', type=str)
+
+ parser.add_argument('-m', '--min-keys', type=int, metavar='nr', help='Only allow entries with at least <nr> keystrokes')
+
+ parser.add_argument('-T', '--title', type=str, metavar='regexp', help='Only allow entries where the title matches this <regexp>')
+ parser.add_argument('-P', '--process', type=str, metavar='regexp', help='Only allow entries where the process name matches this <regexp>')
+ parser.add_argument('-B', '--body', type=str, metavar='regexp', help='Only allow entries where the body matches this <regexp>')
+
+ parser.add_argument('-s', '--showtext', nargs=0, help='Also show the text column. This switch is ignored if at lesat one of the summary options are used.')
+
+ parser.add_argument('--kcratio', nargs=0, help='Summarize the ratio between keystrokes and clicks (not scroll up or down) in the given period.')
+ parser.add_argument('--karatio', nargs=0, help='Summarize the ratio between keystrokes and time active in the given period.')
+
+ parser.add_argument('--keystrokes', nargs=0, help='Summarize number of keystrokes')
+ parser.add_argument('--clicks', nargs=0, help='Summarize number of mouse button clicks for all buttons.')
+
+ parser.add_argument('--key-freqs', nargs=0, help='Summarize a table of absolute and relative number of keystrokes for each used key during the time period.')
+
+ parser.add_argument('--active', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='Summarize total time spent active during the period. The optional argument gives how many seconds after each mouse click (including scroll up or down) or keystroke that you are considered active. Default is %d' % ACTIVE_SECONDS)
+ parser.add_argument('--periods', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='List active time periods. Optional argument works same as for --active')
+
+ parser.add_argument('--pactive', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='List processes, sorted by time spent active in them. Optional argument works same as for --active')
+ parser.add_argument('--tactive', type=int, metavar='seconds', nargs='?', const=ACTIVE_SECONDS, help='List window titles, sorted by time spent active in them. Optional argument works same as for --active')
+
+ parser.add_argument('--pkeys', nargs=0, help='List processes sorted by number of keystrokes.')
+ parser.add_argument('--tkeys', nargs=0, help='List window titles sorted by number of keystrokes.')
+
+ return parser.parse_args()
+
+if __name__ == '__main__':
+ args = vars(parse_config())
+
+ args['data_dir'] = os.path.expanduser(args['data_dir'])
+
+ if need_decryption(args):
+ if args['password'] is None:
+ args['password'] = get_password()
+
+ if args['password'] == "":
+ decrypter = None
+ else:
+ decrypter = Blowfish.new(hashlib.md5(args['password']).digest())
+
+ if not check_password.check(args['data_dir'], decrypter):
+ print 'Password failed'
+ sys.exit(1)
+
+
+
Please sign in to comment.
Something went wrong with that request. Please try again.