In [1]:
import re
from pycparser import parse_file, c_ast
from pycparser.plyparser import Coord

In [2]:
RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
RE_INTERNAL_ATTR = re.compile('__.*__')

In [3]:
class CJsonError(Exception):
    pass
def memodict(fn):
    """ Fast memoization decorator for a function taking a single argument """
    class memodict(dict):
        def __missing__(self, key):
            ret = self[key] = fn(key)
            return ret
    return memodict().__getitem__
@memodict
def child_attrs_of(klass):
    """
    Given a Node class, get a set of child attrs.
    Memoized to avoid highly repetitive string manipulation
    """
    non_child_attrs = set(klass.attr_names)
    all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
    return all_attrs - non_child_attrs
def to_dict(node):
    """ Recursively convert an ast into dict representation. """
    klass = node.__class__

    result = {}

    # Metadata
    result['_nodetype'] = klass.__name__

    # Local node attributes
    for attr in klass.attr_names:
        result[attr] = getattr(node, attr)

    # Coord object
    if node.coord:
        result['coord'] = str(node.coord)
    else:
        result['coord'] = None

    # Child attributes
    for child_name, child in node.children():
        # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
        match = RE_CHILD_ARRAY.match(child_name)
        if match:
            array_name, array_index = match.groups()
            array_index = int(array_index)
            # arrays come in order, so we verify and append.
            result[array_name] = result.get(array_name, [])
            if array_index != len(result[array_name]):
                raise CJsonError('Internal ast error. Array {} out of order. '
                    'Expected index {}, got {}'.format(
                    array_name, len(result[array_name]), array_index))
            result[array_name].append(to_dict(child))
        else:
            result[child_name] = to_dict(child)

    # Any child attributes that were missing need "None" values in the json.
    for child_attr in child_attrs_of(klass):
        if child_attr not in result:
            result[child_attr] = None

    return result
def file_to_dict(filename):
    """ Load C file into dict representation of ast """
    ast = parse_file(filename, use_cpp=False)
    return to_dict(ast)

In [4]:
ast_dict = file_to_dict("test.c")

In [5]:
print(ast_dict)

{'_nodetype': 'FileAST', 'coord': None, 'ext': [{'_nodetype': 'Decl', 'name': 'algo', 'quals': ['const'], 'align': [], 'storage': [], 'funcspec': [], 'coord': 'test.c:1:11', 'type': {'_nodetype': 'TypeDecl', 'declname': 'algo', 'quals': ['const'], 'align': None, 'coord': 'test.c:1:11', 'type': {'_nodetype': 'IdentifierType', 'names': ['int'], 'coord': 'test.c:1:7'}}, 'init': {'_nodetype': 'Constant', 'type': 'int', 'value': '0', 'coord': 'test.c:1:18'}, 'bitsize': None}, {'_nodetype': 'FuncDef', 'coord': 'test.c:2:5', 'decl': {'_nodetype': 'Decl', 'name': 'main', 'quals': [], 'align': [], 'storage': [], 'funcspec': [], 'coord': 'test.c:2:5', 'type': {'_nodetype': 'FuncDecl', 'coord': 'test.c:2:5', 'args': {'_nodetype': 'ParamList', 'coord': 'test.c:2:16', 'params': [{'_nodetype': 'Decl', 'name': 'a', 'quals': [], 'align': [], 'storage': [], 'funcspec': [], 'coord': 'test.c:2:16', 'type': {'_nodetype': 'TypeDecl', 'declname': 'a', 'quals': [], 'align': None, 'coord': 'test.c:2:16', 'typ

In [6]:
for e in ast_dict['ext']:
    print(e,'\n')

{'_nodetype': 'Decl', 'name': 'algo', 'quals': ['const'], 'align': [], 'storage': [], 'funcspec': [], 'coord': 'test.c:1:11', 'type': {'_nodetype': 'TypeDecl', 'declname': 'algo', 'quals': ['const'], 'align': None, 'coord': 'test.c:1:11', 'type': {'_nodetype': 'IdentifierType', 'names': ['int'], 'coord': 'test.c:1:7'}}, 'init': {'_nodetype': 'Constant', 'type': 'int', 'value': '0', 'coord': 'test.c:1:18'}, 'bitsize': None} 

{'_nodetype': 'FuncDef', 'coord': 'test.c:2:5', 'decl': {'_nodetype': 'Decl', 'name': 'main', 'quals': [], 'align': [], 'storage': [], 'funcspec': [], 'coord': 'test.c:2:5', 'type': {'_nodetype': 'FuncDecl', 'coord': 'test.c:2:5', 'args': {'_nodetype': 'ParamList', 'coord': 'test.c:2:16', 'params': [{'_nodetype': 'Decl', 'name': 'a', 'quals': [], 'align': [], 'storage': [], 'funcspec': [], 'coord': 'test.c:2:16', 'type': {'_nodetype': 'TypeDecl', 'declname': 'a', 'quals': [], 'align': None, 'coord': 'test.c:2:16', 'type': {'_nodetype': 'IdentifierType', 'names': ['