In [1]:
import tmilib
reload(tmilib)
from tmilib import *

In [13]:
def get_focused_tab(data):
  windows = data['windows']
  for window in windows:
    focused = window['focused']
    if not focused:
      continue
    tabs = window['tabs']
    for tab in tabs:
      if not tab['highlighted']:
        continue
      if not tab['selected']:
        continue
      return tab['url']

class SessionTracker:
  def __init__(self):
    self.output = []
    self.curitem = {}
  def get_output(self):
    self.end_input()
    return self.output
  def end_input(self):
    if 'url' in self.curitem:
      last_active = self.curitem['active']
      self.curitem['end'] = last_active + 60*1000
      self.output.append(self.curitem)
      self.curitem = {}
  def end_session(self, curtime):
    if 'url' in self.curitem:
      last_active = self.curitem['active']
      # ensures that end < last_active+60 seconds
      self.curitem['end'] = min(curtime, last_active + 60*1000)
      self.output.append(self.curitem)
      self.curitem = {}
  def start_session(self, url, curtime):
    if url == None:
      raise 'start_session should not be called with url==None'
    self.end_session(curtime)
    # start: first event in the session
    # active: last event which was active in the session
    # end: when we believe the session ended
    self.curitem = {'url': url, 'start': curtime, 'active': curtime}
  def continue_session(self, url, curtime):
    if url == None:
      raise 'continue_session should not be called with url==None'
    if 'url' not in self.curitem:
      self.start_session(url, curtime)
      return
    prevurl = self.curitem['url']
    if url == prevurl: # still on same site
      self.curitem['active'] = curtime
      return
    # have gone to different site
    self.end_session(curtime)
    self.start_session(url, curtime)
  def process_input(self, data):
    evt = data['evt']
    curtime = data['time'] # this is timestamp in milliseconds
    cururl = get_focused_tab(data)
    if cururl == None: # browser is not focused
      self.end_session(curtime)
      return
    if evt == 'idle_changed':
      self.process_idle_changed(data)
      return
    if evt == 'still_browsing': # ignore still_browsing events
      return
    self.continue_session(cururl, curtime)
  def process_idle_changed(self, data):
    # idlestate can be either idle, locked, or active
    idlestate = data['idlestate']
    curtime = data['time']
    if idlestate == 'idle' or idlestate == 'locked':
      self.end_session(curtime)
      return
    if idlestate == 'active':
      cururl = get_focused_tab(data)
      self.start_session(cururl, curtime)

for filename in list_logfiles()[:1]:
  #print filename
  size_bytes = os.stat(filename).st_size
  size_megabytes = float(size_bytes) / (1000.0*1000.0)
  if size_megabytes > 0.1:
    continue
  session_tracker = SessionTracker()
  for data in iterate_data(filename):
    session_tracker.process_input(data)
  session_tracker.end_input()
  #print session_tracker.get_output()



In [None]:
def compute_url_focus

In [15]:
def compute_for_key_on_data(key, data, name, function):
  outfile = name + '/' + key + '.json'
  if sdir_exists(outfile):
    return
  result = function(data)
  sdir_dumpjson(outfile, result)



In [4]:
evt_types = set()
for filename in list_logfiles():
  size_bytes = os.stat(filename).st_size
  size_megabytes = float(size_bytes) / (1000.0*1000.0)
  if size_megabytes > 0.1:
    continue
  print filename
  for data in iterate_data_compressed(filename):
    evt_types.add(data['evt'])
  #break
  #size_bytes = os.stat(x).st_size
  #size_megabytes = float(size_bytes) / (1000.0*1000.0)
  #print size_megabytes

/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_Eq7EExfolE.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_3H5aGCb1Km.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_vbZrd5LD5J.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_pQiVSc5Ta5.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_gtg8G9PJk7.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_r9pox860Kx.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_ZIpD1khEqv.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_kEVg7v0LcS.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_cTggWOIxwe.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_epgLZz5GYr.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_NlUyZzFe88.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_uVep19IpQg.json
/home/gkovacs/tmi-data/local_2016-03-23_19:13:45-07:00/logs_JgGfKNPVdz.json
/home/gkovac

In [15]:
print evt_types

set([u'tab_moved', u'tab_replaced', u'tab_zoomchange', u'browser_focus_changed', u'tab_activated', u'tab_created', u'idle_changed', u'tab_updated', u'window_focus_changed', u'tab_attached', u'window_created', u'tab_highlighted', u'window_closed', u'tab_removed', u'still_browsing', u'tab_detached'])
