Browse files

More improvements of storage format and rendering

  • Loading branch information...
1 parent 698fb1d commit af50c483b381132f4afdfb7c774793f60d8db697 @dcramer committed Mar 2, 2012
Showing with 299 additions and 204 deletions.
  1. +26 −18 htmlfiles/index.html
  2. +31 −69 peek/collector.py
  3. +13 −11 peek/reporter.py
  4. +161 −64 peek/tracer.py
  5. +22 −0 script.py
  6. +46 −42 tests/peek/collector/tests.py
View
44 htmlfiles/index.html
@@ -21,28 +21,36 @@
$results.empty();
self.renderRow($results, null, data);
+ // $.each(data, function(lineno, chunk){
+ // self.renderRow($results, null, chunk);
+ // });
// $results.Treeview({ speed: "fast", collapsed: true, control: "#treecontrol" });
};
this.renderRow = function($el, parent, data){
+ console.log(data);
+
+ if (data.lines.length === 0) return;
+
var $parent = $('<table></table>');
- var had_any = false;
- $.each(data, function(key, value){
- had_any = true;
- var label = value.s;
- if (label == '') {
- label = '&nbsp;';
- }
- var timing = ((Math.round((value.t / value.n) * 1000, 5) / 1000) || 0);
- var $row = $('<tr><td class="t">' + timing + 's</td><td class="l">' + value.l + '</td><td class="c"><pre>' + label + '</pre></tr>');
+ $.each(data.lines, function(lineno, line){
+ var label = line.source || '&nbsp;';
+ var children = data.children[lineno] || {};
+ var timing = Math.round(line.time_spent * 1000, 3) / 1000;
+ var $row = $('<tr><td class="t">' + timing + 's</td><td class="n">' + line.num_calls + '</td><td class="l">' + lineno + '</td><td class="c"><pre>' + label + '</pre></tr>');
+ // var timing = ((Math.round((value.time_spent / value.num_calls) * 1000, 5) / 1000) || 0);
+ // var $row = $('<tr><td class="t">' + timing + 's</td><td class="n">' + value.num_calls + '</td><td class="l">' + value.lineno + '</td><td class="c"><pre>' + label + '</pre></tr>');
+
+ var $rowc = $($row.find('td.c').get());
+ $.each(children, function(call_sig, child){
+ self.renderRow($rowc, data, child);
+ });
+
$parent.append($row);
- self.renderRow($row.find('td:last-child'), value, value.c);
});
- if (had_any) {
- $el.append($parent);
- }
+ $el.append($parent);
};
return this;
@@ -57,9 +65,6 @@
padding: 0;
margin: 0;
}
- li {
- margin-left: 10px;
- }
pre {
font-family: monaco, fixed;
font-size: inherit;
@@ -75,15 +80,18 @@
vertical-align: top;
font-family: monaco, fixed;
}
- td.t, td.l {
+ td.t, td.l, td.n {
padding-right: 10px;
text-align: right;
}
td.c {
color: #555;
text-align: left;
}
- </style>
+/* .c table table table table {
+ display: none;
+ }
+*/ </style>
</head>
<body>
<ol id="results"></ol>
View
100 peek/collector.py
@@ -8,124 +8,86 @@
__all__ = ('Collector',)
+import inspect
import sys
import threading
-# TODO: handle Python < 2.7
-from collections import OrderedDict
from peek.tracer import Tracer
class Collector(object):
def __init__(self, logger=None, tracer=Tracer):
- self._collectors = []
self._tracer_class = Tracer
self.logger = logger
- self.reset()
- def _start_tracer(self):
+ def _start_tracer(self, origin):
"""
Start a new Tracer object, and store it in self.tracers.
"""
tracer = self._tracer_class(logger=self.logger)
- tracer.cur_file_data = self.data
- fn = tracer.start()
+ tracer.data = self.data
+ fn = tracer.start(origin)
self.tracers.append(tracer)
return fn
- def _installation_trace(self, frame_unused, event_unused, arg_unused):
+ def _installation_trace(self, origin):
"""
Called on new threads, installs the real tracer.
"""
- # Remove ourselves as the trace function
- sys.settrace(None)
+ def _wrapped(frame_unused, event_unused, arg_unused):
+ # Remove ourselves as the trace function
+ sys.settrace(None)
- # Install the real tracer.
- fn = self._start_tracer()
+ # Install the real tracer.
+ fn = self._start_tracer(origin)
- # Invoke the real trace function with the current event, to be sure
- # not to lose an event.
- if fn:
- fn = fn(frame_unused, event_unused, arg_unused)
+ # Invoke the real trace function with the current event, to be sure
+ # not to lose an event.
+ if fn:
+ fn = fn(frame_unused, event_unused, arg_unused)
- # Return the new trace function to continue tracing in this scope.
- return fn
+ # Return the new trace function to continue tracing in this scope.
+ return fn
+ return _wrapped
def reset(self):
- """
- Clear collected data, and prepare to collect more.
- """
- # A dictionary mapping filenames to dicts with linenumber keys,
- # or mapping filenames to dicts with linenumber pairs as keys.
-
- # TODO: this is duplicated in the tracer to some extent
- self.data = {
- "n": 0,
- "c": 0,
- "c": OrderedDict(),
- }
-
- # Our active Tracers.
self.tracers = []
+ self.data = {}
def start(self):
"""
Start collecting trace information.
"""
- if self._collectors:
- self._collectors[-1].pause()
- self._collectors.append(self)
-
- # Check to see whether we had a fullcoverage tracer installed.
- traces0 = None
- if hasattr(sys, "gettrace"):
- fn0 = sys.gettrace()
- if fn0:
- tracer0 = getattr(fn0, '__self__', None)
- if tracer0:
- traces0 = getattr(tracer0, 'traces', None)
+ origin = inspect.stack()[1][0]
- # Install the tracer on this thread.
- fn = self._start_tracer()
+ self.reset()
- if traces0:
- for args in traces0:
- (frame, event, arg), lineno = args
- fn(frame, event, arg, lineno=lineno)
+ # Install the tracer on this thread.
+ self._start_tracer(origin)
# Install our installation tracer in threading, to jump start other
# threads.
- threading.settrace(self._installation_trace)
+ # threading.settrace(self._installation_trace(origin))
def pause(self):
"""
- Pause tracing, but be prepared to `resume`.
+ Pause tracing, but be prepared to resume.
"""
for tracer in self.tracers:
tracer.stop()
- stats = tracer.get_stats()
- if stats:
- print("\nCoverage.py tracer stats:")
- for k in sorted(stats.keys()):
- print("%16s: %s" % (k, stats[k]))
threading.settrace(None)
def stop(self):
"""
Stop collecting trace information.
"""
- assert self._collectors
- assert self._collectors[-1] is self
-
self.pause()
self.tracers = []
- # Remove this Collector from the stack, and resume the one underneath
- # (if any).
- self._collectors.pop()
- if self._collectors:
- self._collectors[-1].resume()
-
- def get_calls(self):
+ def get_results(self):
# the first key should be our trace, lets ignore it
- # the last key should be our stop call, lets ignore it
- return dict((k, v) for k, v in self.data['c'].items()[1:-1])
+ # the last key is our collector
+ return self.data
+ # calls = self.data['children']
+ # for key, call in calls.iteritems():
+ # print '%s:%s, line %d, %d calls' % (call.get('filename'), call.get('function'), call['lineno'], call['num_calls'])
+ # print ' ', call.get('source')
View
24 peek/reporter.py
@@ -9,6 +9,7 @@
__all__ = ('Reporter',)
import json
+import os
import os.path
@@ -18,7 +19,7 @@ def __init__(self, origin, collector, output=None):
self.collector = collector
if output is None:
- output = 'peekhtml'
+ output = os.path.join(os.getcwd(), 'peekhtml')
self.output = os.path.normpath(output)
@@ -33,16 +34,16 @@ class HTMLReporter(Reporter):
This actually just dumps some JSON to a file, which a static HTML page will
pull in and render.
"""
- def _get_origin(self, data, origin):
- if origin in data:
- return {origin: data[origin]}
+ # def _get_origin(self, data, origin):
+ # if origin in data:
+ # return {origin: data[origin]}
- for key, value in data.iteritems():
- if key.startswith(origin):
- return {key: value}
+ # for key, value in data.iteritems():
+ # if key.startswith(origin):
+ # return {key: value}
- for key, value in data.iteritems():
- return self._get_origin(value['c'], origin)
+ # for key, value in data.iteritems():
+ # return self._get_origin(value['children'], origin)
# def get_files(self, calls, files=None):
# if files is None:
@@ -55,10 +56,11 @@ def _get_origin(self, data, origin):
# return files
def report(self):
- calls = self._get_origin(self.collector.get_calls(), self.origin)
+ results = self.collector.get_results()
+ # calls = self._get_origin(self.collector.get_results(), self.origin)
if not os.path.exists(self.output):
os.makedirs(self.output)
with open(os.path.join(self.output, 'data.json'), 'w') as fp:
- fp.write('Peek.load(' + json.dumps(calls) + ');')
+ fp.write('Peek.load(' + json.dumps(results) + ');')
View
225 peek/tracer.py
@@ -2,135 +2,238 @@
peek.tracer
~~~~~~~~~~~
+This code is complicated as shit, but it basically recurses and captures
+execution times in a tree. Below you'll find an example output structure
+with expanded variable names.
Code is inspired and originally based on ``coverage.py``.
+::
+
+ {
+ "filename": "my_test.py",
+
+ "event": "call",
+
+ # module and function are optional
+ "module": "my.test",
+ "function": "main",
+ "lineno": 0,
+
+ "num_calls": 0, # same as len(calls)
+ "time_spent": 0,
+
+ "children": [
+ {
+ "event": "line",
+ "source": "def main():",
+
+ "num_calls": 0,
+ "time_spent": 0,
+ },
+ {
+ "event": "call",
+ "source": " __import__('foo')",
+
+ "filename": "builtins_or_some_shit.py",
+ "module": "__builtin__",
+ "function": "__import__",
+ "lineno": 5,
+
+ "caller": {
+ "lineno": 5,
+ }
+
+ "num_calls": 0,
+ "time_spent": 0,
+
+ "children": [
+ {
+ "event": "line",
+ "source": "def __import__(foo):",
+
+ "num_calls": 0,
+ "time_spent": 0,
+ },
+ {
+ "event": "line",
+ "source": " return sys.modules[foo]",
+
+ "num_calls": 0,
+ "time_spent": 0,
+ },
+ ],
+ },
+ {
+ "event": "return",
+ "source": " return True",
+
+ "num_calls": 0,
+ "time_spent": 0,
+ },
+ ],
+ }
+
+
:copyright: 2012 DISQUS.
:license: Apache License 2.0, see LICENSE for more details.
"""
__all__ = ('Tracer',)
+from collections import defaultdict
import inspect
import sys
import time
-from collections import OrderedDict
class Tracer(object):
"""
A tracer which records timing information for every executed line.
- Statistics generated from the Tracer resemble the following:
-
- OrderedDict([("filename:function_name", {
- "filename": "filename",
- "module": "module.name",
- "function": "function_name',
- "num_calls": 0,
- "time_spent": 0,
- "calls": OrderedDict([
- # recursive input same as root
- ])
- })])
+ This code is complicated as hell because it has to determine where
+ the origin is so it records the tree correctly.
"""
def __init__(self, logger=None):
- self.cur_file_data = None
- self.last_time = 0
+ self.data = None
+ self.depth = 0
+ self.pause_until = None
self.data_stack = []
self.last_exc_back = None
self.last_exc_firstlineno = 0
self.logger = logger
- def _get_struct(self, frame):
+ def _get_struct(self, frame, event):
filename = inspect.getfile(frame)
function_name = frame.f_code.co_name
f_globals = getattr(frame, 'f_globals', {})
module_name = f_globals.get('__name__')
- return {
- "f": filename,
- "m": module_name,
- "fn": function_name,
- "n": 0,
- "t": 0,
- "l": frame.f_lineno,
- "c": OrderedDict([
- ("%s:%d" % (filename, n), {
- "s": l[:-1],
- "n": 0,
- "t": 0,
- "l": n,
- "c": OrderedDict(),
- }) for n, l in enumerate(*inspect.getsourcelines(frame))
- ]),
+
+ source, lineno = inspect.getsourcelines(frame)
+
+ pre_frame = frame.f_back
+
+ result = {
+ "event": event,
+
+ "filename": filename,
+ "module": module_name,
+ "function": function_name,
+
+ "num_calls": 0,
+ "time_spent": 0,
+
+ "lineno": frame.f_lineno,
+
+ "lines": dict(
+ (num, {
+ "num_calls": 0,
+ "time_spent": 0,
+ "source": code[:-1],
+ })
+ for num, code in enumerate(source, lineno)
+ ),
+
+ "children": defaultdict(dict),
+ # lineno: {
+ # function_name: struct
+ # }
}
+ if pre_frame:
+ result["caller"] = {
+ "lineno": pre_frame.f_lineno,
+ }
+
+ return result
+
def _trace(self, frame, event, arg_unused):
"""
The trace function passed to sys.settrace.
"""
cur_time = time.time()
+ lineno = frame.f_lineno
+ depth = self.depth
+
if self.logger:
+ # self.logger.debug'%s:%s, line %d, %d calls' % (call.get('filename'), call.get('function'), call['lineno'], call['num_calls'])
+
self.logger.debug("trace event: %s %r @%d" % (
event, frame.f_code.co_filename, frame.f_lineno))
if self.last_exc_back:
if frame == self.last_exc_back:
- # Someone forgot a return event.
- self.cur_file_data, self.last_time = self.data_stack.pop()
- # TODO: is this correct?
- self.cur_file_data['t'] += (cur_time - self.last_time)
+ self.data['time_spent'] += (cur_time - self.start_time)
+ self.depth -= 1
+
+ self.data = self.data_stack.pop()
self.last_exc_back = None
if event == 'call':
- # Entering a new function context. Decide if we should trace
- # in this file.
+ # Update our state
+ self.depth += 1
+
+ # origin line number (where it was called from)
+ o_lineno = frame.f_back.f_lineno
+
+ if self.pause_until is not None:
+ if depth == self.pause_until:
+ self.pause_until = None
+ else:
+ return self._trace
+
+ if o_lineno not in self.data['lines']:
+ self.pause_until = depth
+
+ return self._trace
# Append it to the stack
- self.data_stack.append((self.cur_file_data, cur_time))
+ self.data_stack.append(self.data)
- filename = inspect.getfile(frame)
- key = '%s:%d' % (filename, frame.f_lineno)
+ call_sig = '%s:%s' % (inspect.getfile(frame), frame.f_code.co_name)
- if key not in self.cur_file_data['c']:
- self.cur_file_data['c'][key] = self._get_struct(frame)
+ if call_sig not in self.data['children']:
+ self.data['children'][o_lineno][call_sig] = self._get_struct(frame, event)
- self.cur_file_data = self.cur_file_data['c'][key]
- self.cur_file_data['n'] += 1
+ self.data = self.data['children'][o_lineno][call_sig]
- # TODO: do we need this
- # Set the last_line to -1 because the next arc will be entering a
- # code block, indicated by (-1, n).
- # self.last_line = -1
+ self.data['num_calls'] += 1
elif event == 'line':
# Record an executed line.
- pass
- # filename = frame.f_code.co_filename
- # function_name = frame.f_code.co_name
- # key = '%s:%s:%d' % (filename, function_name, frame.f_lineno)
- # self.cur_file_data['c'][key] = self._get_line_struct(frame)
+ if self.pause_until is None and lineno in self.data['lines']:
+ self.data['lines'][lineno]['num_calls'] += 1
+ self.data['lines'][lineno]['time_spent'] += (cur_time - self.start_time)
elif event == 'return':
# Leaving this function, pop the filename stack.
- if self.cur_file_data:
- self.cur_file_data['t'] += (cur_time - self.last_time)
- self.cur_file_data, self.last_time = self.data_stack.pop()
+ if self.pause_until is None:
+ self.data['time_spent'] += (cur_time - self.start_time)
+ self.data = self.data_stack.pop()
+ self.data['time_spent'] += (cur_time - self.start_time)
+ # self.data['lines'][lineno]['num_calls'] += 1
+ # self.data['lines'][lineno]['time_spent'] += (cur_time - self.start_time)
+
+ self.depth -= 1
elif event == 'exception':
self.last_exc_back = frame.f_back
self.last_exc_firstlineno = frame.f_code.co_firstlineno
return self._trace
- def start(self):
+ def start(self, origin):
"""
Start this Tracer.
Return a Python function suitable for use with sys.settrace().
"""
+ self.start_time = time.time()
+ self.pause_until = None
+ self.data.update(self._get_struct(origin, 'origin'))
+ self.data_stack.append(self.data)
sys.settrace(self._trace)
return self._trace
@@ -143,9 +246,3 @@ def stop(self):
msg = "Trace function changed, measurement is likely wrong: %r"
self.logger.warn(msg % sys.gettrace())
sys.settrace(None)
-
- def get_stats(self):
- """
- Return a dictionary of statistics, or None.
- """
- return None
View
22 script.py
@@ -0,0 +1,22 @@
+import time
+
+
+def function_one(arg):
+ # a comment
+ foo = arg
+ bar = 'hello'
+ function_two(foo, bar)
+
+
+def function_two(foo, bar):
+ result = bar + ' ' + foo
+ return result
+
+
+def main():
+ for n in xrange(10):
+ function_one(str(n))
+ time.sleep(0.1)
+
+if __name__ == '__main__':
+ main()
View
88 tests/peek/collector/tests.py
@@ -2,6 +2,17 @@
from peek.collector import Collector
+def collect():
+ collector = Collector()
+ collector.start()
+
+ function_one(' world')
+
+ collector.stop()
+
+ return collector
+
+
def function_one(arg):
# a comment
foo = arg
@@ -16,53 +27,46 @@ def function_two(foo, bar):
class CollectorIntegrationTestCase(TestCase):
def test_does_trace(self):
- collector = Collector()
- collector.start()
-
- function_one(' world')
+ collector = collect()
- collector.stop()
+ data = collector.get_results()
+ self.assertGreater(len(data), 0)
- data = collector.get_calls()
- self.assertEquals(len(data), 1)
-
- key = '%s:function_one:5' % __file__
+ key = 9
self.assertIn(key, data)
result = data[key]
- self.assertIn('f', result)
- self.assertEquals(result['f'], __file__)
- self.assertIn('m', result)
- self.assertEquals(result['m'], __name__)
- self.assertIn('fn', result)
- self.assertEquals(result['fn'], 'function_one')
- self.assertIn('n', result)
- self.assertEquals(result['n'], 1)
- self.assertIn('t', result)
- self.assertGreater(result['t'], 0)
-
- self.assertIn('c', result)
- calls = result['c']
- self.assertEquals(len(calls), 4)
-
- import pprint
- pprint.pprint(calls)
-
- child_key = '%s:function_two:12' % __file__
+ self.assertIn('filename', result)
+ self.assertEquals(result['filename'], __file__)
+ self.assertIn('module', result)
+ self.assertEquals(result['module'], __name__)
+ self.assertIn('function', result)
+ self.assertEquals(result['function'], 'function_one')
+ self.assertIn('num_calls', result)
+ self.assertEquals(result['num_calls'], 1)
+ self.assertIn('time_spent', result)
+ self.assertGreater(result['time_spent'], 0)
+
+ self.assertIn('children', result)
+ calls = result['children']
+ self.assertEquals(len(calls), 5, calls.keys())
+
+ child_key = 20
self.assertIn(child_key, calls)
result = calls[child_key]
- self.assertIn('f', result)
- self.assertEquals(result['f'], __file__)
- self.assertIn('m', result)
- self.assertEquals(result['m'], __name__)
- self.assertIn('fn', result)
- self.assertEquals(result['fn'], 'function_two')
- self.assertIn('n', result)
- self.assertEquals(result['n'], 1)
- self.assertIn('t', result)
- self.assertGreater(result['t'], 0)
-
- self.assertIn('c', result)
- calls = result['c']
- self.assertEquals(len(calls), 2)
+ self.assertIn('filename', result)
+ self.assertEquals(result['filename'], __file__)
+ self.assertIn('module', result)
+ self.assertEquals(result['module'], __name__)
+ self.assertIn('function', result)
+ self.assertEquals(result['function'], 'function_two')
+ self.assertIn('num_calls', result)
+ self.assertEquals(result['num_calls'], 1)
+ self.assertIn('time_spent', result)
+ self.assertGreater(result['time_spent'], 0)
+
+ self.assertIn('children', result)
+ calls = result['children']
+ self.assertEquals(len(calls), 3, calls.keys())
+

0 comments on commit af50c48

Please sign in to comment.