Permalink
Fetching contributors…
Cannot retrieve contributors at this time
481 lines (393 sloc) 13.4 KB
#-------------------------------------------------------------------------------
# htmlize-ast-dump.py: Turn a Clang AST dump (-ast-dump) into cross-linked HTML.
#
# Run with --help for usage information.
#
# Note: this script requires Python 3.4; earlier versions of Python 3 should
# work if you install the enum34 module.
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import argparse
import enum
import html
import io
import json
import pprint
import re
import sys
HTML_OUTPUT_TEMPLATE = r'''
<html>
<head>
<style>
.main_area, .nav_area {{
position: absolute;
left: 0;
right: 0;
}}
.main_area {{
top: 0;
height: 75%;
overflow: scroll;
background-color: black;
white-space: nowrap;
padding-left: 10px;
}}
.nav_area {{
bottom: 0;
height: 25%;
overflow: scroll;
background-color: #131313;
}}
#nav_title {{
margin-left: auto;
margin-right: auto;
width: 200px;
font-weight: bold;
color: white;
font-size: 140%;
}}
#nav_contents {{
font-family: Consolas,monospace;
font-size: 80%;
color: #AAAAAA;
padding: 10px;
}}
.my-pre {{
line-height: 0.8;
padding: 0px 0px;
font-family: Consolas,monospace;
font-size: 80%;
}}
a:link {{
text-decoration: underline;
color: inherit;
}}
a:visited {{
text-decoration: underline;
color: inherit;
}}
a:hover {{
text-decoration: underline;
color: #FFFFFF;
}}
a:active {{
text-decoration: underline;
color: #FFFFFF;
}}
.ansi-bold {{
font-weight: bold;
white-space: pre;
}}
.ansi-black {{
color: #000000;
white-space: pre;
}}
.ansi-red {{
color: #d23737;
white-space: pre;
}}
.ansi-green {{
color: #17b217;
white-space: pre;
}}
.ansi-yellow {{
color: #b26717;
white-space: pre;
}}
.ansi-blue {{
color: #2727c2;
white-space: pre;
}}
.ansi-magenta {{
color: #b217b2;
white-space: pre;
}}
.ansi-cyan {{
color: #17b2b2;
white-space: pre;
}}
.ansi-white {{
color: #f2f2f2;
white-space: pre;
}}
</style>
</head>
<body>
<div class="main_area">
<pre class="my-pre">{lines}
</pre>
</div>
<div class="nav_area">
<div id="nav_contents">[Click on node address for cross-reference]</div>
</div>
<!-- Javascript -->
<script type="text/javascript">
var nav_data = {nav_data};
{js_code}
</script>
</body>
</html>
'''
JS_CODE = r'''
MakeAnchorLink = function(addr) {
anchorname = 'anchor_' + addr
return '<a href="#' + anchorname + '">' + addr + '</a>'
}
OnAnchorClick = function(elem_id) {
var nav_entry = nav_data[elem_id];
var contents = '';
contents += nav_entry['name'] + ' ' + nav_entry['id'];
contents += '<ul>\n';
parent_id = nav_entry['parent'];
if (parent_id === null) {
contents += '<li>Parent: none</li>\n';
} else {
parent_name = nav_data[parent_id]['name']
contents += '<li>Parent: ' + parent_name + ' ' +
MakeAnchorLink(parent_id) + '</li>\n';
}
contents += '<li>Children:'
if (nav_entry['children'].length == 0) {
contents += 'none</li>'
} else {
contents += '\n<ul>\n'
for (var i = 0; i < nav_entry['children'].length; i++) {
child_id = nav_entry['children'][i];
child_name = nav_data[child_id]['name'];
contents += '<li>' + child_name + ' ' +
MakeAnchorLink(child_id) + '</li>\n';
}
contents += '</ul>\n'
}
contents += '<li>Users:'
if (nav_entry['users'].length == 0) {
contents += 'none</li>'
} else {
contents += '\n<ul>\n'
for (var i = 0; i < nav_entry['users'].length; i++) {
user_id = nav_entry['users'][i];
user_name = nav_data[user_id]['name'];
contents += '<li>' + user_name + ' ' +
MakeAnchorLink(user_id) + '</li>\n';
}
contents += '</ul>\n'
}
document.getElementById('nav_contents').innerHTML = contents;
}
'''
SPAN_TEMPLATE = r'<span class="{klass}">{text}</span>'
class Color(enum.Enum):
"""Colors with values corresponding to the ANSI codes.
"""
BLACK = 30
RED = 31
GREEN = 32
YELLOW = 33
BLUE = 34
MAGENTA = 35
CYAN = 36
WHITE = 37
# Input is broken to tokens. A token is a piece of text with the style that
# applies to it. The text is decoded from binary to a string.
class Token:
def __init__(self, text, style):
self.text = text.decode('ascii')
self.style = style
def __repr__(self):
return 'Token<text={}, style={}>'.format(self.text, self.style)
class Style:
def __init__(self, color=Color.WHITE, bold=False):
self.color = color
self.bold = bold
def __repr__(self):
return 'Style<color={}, bold={}>'.format(self.color, self.bold)
ANSI_PATTERN = re.compile(rb'\x1b\[([^m]+)m')
def tokenize_line(line):
"""Produce (yield) a stream of tokens from an input line.
"""
# The end pos of the last pattern match.
last_end = 0
# Current style
cur_style = Style()
for match in ANSI_PATTERN.finditer(line):
preceding_text = line[last_end:match.start()]
yield Token(preceding_text, cur_style)
last_end = match.end()
# Set the current style according to the ANSI code in the match.
for ansi_code in (int(c) for c in match.group(1).split(b';')):
if ansi_code == 0:
# Reset
cur_style = Style()
elif ansi_code == 1:
cur_style.bold = True
else:
# Color code. Obtain from Color enum. This will bomb if the
# color code is invalid.
cur_style.color = Color(ansi_code)
leftover_text = line[last_end:]
yield Token(leftover_text, cur_style)
# Link injections happens on HTML level - everything is a string now.
ADDR_PATTERN = re.compile(r'0x[0-9a-fA-F]+')
def make_anchor_link(addr, link_text):
anchorname = 'anchor_' + addr
return '<a href="#' + anchorname + '">' + link_text + '</a>'
def make_anchor_target(addr):
anchorname = 'anchor_' + addr
return '<a id="' + anchorname + '"></a>'
def inject_links(html_line_chunks):
first_addr = True
for i, chunk in enumerate(html_line_chunks):
match = ADDR_PATTERN.search(chunk)
if match:
addr = match.group()
if first_addr:
# The first address encountered in the line is the address of
# the node the line describes. This becomes a link anchor.
#print(tok.text[match.start():match.end()], file=sys.stderr)
html_line_chunks[i] = (
chunk[:match.start()] +
make_anchor_target(addr) +
'<a onclick="OnAnchorClick(\'' + addr +
'\');" href="#javascript:void(0)">' +
chunk[match.start():] + '</a>')
first_addr = False
else:
# All other addresses refer to other nodes. These become links
# to anchors.
html_line_chunks[i] = (
chunk[:match.start()] +
make_anchor_link(addr, chunk[match.start():match.end()]) +
chunk[match.end():])
def analyze_line(tokens):
"""Analyzes the given line (a list of tokens).
Returns the tuple: <id>, <name>, <nesting level>, [<used id>...]
"""
assert(len(tokens) > 2)
# The top-level TranslationUnitDecl node has no nesting
if tokens[1].text.startswith('Translation'):
nesting = ''
itok = 1
else:
nesting = tokens[1].text
itok = 2
# The name is a concat of the following non-empty tokens, until something
# that looks like the ID is encountered, or the line ends.
name_parts = []
while itok < len(tokens):
t = tokens[itok].text.strip()
if len(t) > 0:
if ADDR_PATTERN.match(t):
# Found an ID; bail out
break
else:
# Part of the name
name_parts.append(t)
itok += 1
name = ' '.join(name_parts)
# Here itok is either past the end of the list, or it points to the ID.
id = tokens[itok].text.strip() if itok < len(tokens) else ''
itok += 1
# Gather all uses
uses = []
while itok < len(tokens):
t = tokens[itok].text.strip()
if ADDR_PATTERN.match(t):
uses.append(t)
itok += 1
nesting_level = len(nesting)
return id, name, nesting_level, uses
def prepare_nav_data(line_info):
"""Given a list of tuples from analyze_line, prepares navigation data.
Navigation data is a dictionary mapping an id to its children ids, paren id
and user ids.
It's important for line_info to be in the order gathered from the input. The
order is essential for determining parent/child relationships.
"""
# ZZZ: in the end, add 'users' fields...
nav_data = {}
def new_data_entry(line_entry):
"""Create a new entry with empty parent and child info."""
nonlocal nav_data
id, name, nesting_level, uselist = line_entry
nav_data[id] = {'id': id, 'name': name,
'uses': uselist, 'users': [],
'nesting_level': nesting_level,
'parent': None, 'children': []}
return nav_data[id]
# Keep a stack of parents. The topmost parent on the stack is the one
# collecting the current children, and their parent ID is mapped to it. The
# stack is popped when the nesting level decreases (popped until the topmost
# parent has a lower nesting level). Every entry is eventually pushed onto
# the stack because it may have children.
assert len(line_info) > 0
assert line_info[0][2] == 0, "Expect top-level entry at nesting level 0"
# Initialize the parent stack to the first entry
parent_stack = [new_data_entry(line_info[0])]
for line_entry in line_info[1:]:
data_entry = new_data_entry(line_entry)
# Pop the stack until the topmost entry is a suitable parent for this
# one.
while parent_stack[-1]['nesting_level'] >= data_entry['nesting_level']:
# Note: no entry except the toplevel has nesting 0, so this will
# always terminate with at most 1 entry remaining on the stack.
parent_stack.pop()
# Now parent_stack[-1] is the parent of this entry. Update the entries
# accordingly.
data_entry['parent'] = parent_stack[-1]['id']
parent_stack[-1]['children'].append(data_entry['id'])
# At this point, we push the current entry onto the stack.
parent_stack.append(data_entry)
# Finally, add 'users' fields to all entries. This is an inversion of 'uses'
for id, entry in nav_data.items():
for used_id in entry['uses']:
if used_id in nav_data:
nav_data[used_id]['users'].append(id)
return nav_data
def htmlize(input):
"""HTML-ize the input text, producing output.
input: stream / file-like object with textual AST dump.
Returns a string with HTML-ized dump.
"""
html_lines = []
# collected list of line analysis info
line_info = []
for text_line in input:
html_line_chunks = []
tokens = list(tokenize_line(text_line))
line_info.append(analyze_line(tokens))
for tok in tokens:
style = tok.style
klass = 'ansi-{}'.format(style.color.name.lower())
if style.bold:
klass += ' ansi-bold'
html_line_chunks.append(SPAN_TEMPLATE.format(
klass=klass,
text=html.escape(tok.text)))
html_line_chunks.append('<br/>')
inject_links(html_line_chunks)
html_lines.append(''.join(html_line_chunks))
nav_data = prepare_nav_data(line_info)
return HTML_OUTPUT_TEMPLATE.format(lines='\n'.join(html_lines),
nav_data=json.dumps(nav_data),
js_code=JS_CODE)
def main():
argparser = argparse.ArgumentParser(
description='HTML output is emitted to stdout')
argparser.add_argument('dump_file',
help='AST dump file, "-" for reading from stdin')
args = argparser.parse_args()
try:
# Argh: it would be nice to use argparse's FileType to do this
# automatically, but unfortunately it's broken for binary mode
# (http://bugs.python.org/issue14156)
input_stream = (open(sys.argv[1], 'rb') if args.dump_file != '-' else
io.BufferedReader(sys.stdin.buffer))
print(htmlize(input_stream))
#tokens = list(tokenize_line(l) for l in input_stream)
#print(list(tokens[0]))
finally:
input_stream.close()
if __name__ == '__main__':
main()