In [70]:

from functools import lru_cache
from pathlib import Path

import clang
import clang.cindex
from clang.cindex import CursorKind

try:
    # Set library file for clang.cindex
    clang.cindex.Config.set_library_file("/home/junda/.local/lib/python3.8/site-packages/clang/native/libclang-16.so")
except:
    print("Library path already set once.")
    pass



Library path already set once.


In [71]:
def parse_file(path):
    idx = clang.cindex.Index.create()
    source_path = path
    home_path = Path.home()
    tu = idx.parse(
        path, args=[
            # On macos
            # '-I/usr/lib/clang/11/include',
            # '-std=c++17',
            # '-I/mnt/ssd2/junda/.pyenv/versions/3.10.6/include/python3.10',
            # '-I/home/junda/.pyenv/versions/3.10.6/lib/python3.10/site-packages/numpy/core/include',
            '-I/usr/include/python3.8', # TODO: I'm using python3.8.10
            '-I/usr/lib/llvm-10/lib/clang/10.0.0/include',
            '-I/home/junda/.local/lib/python3.8/site-packages/numpy/core/include',
            '-std=c++17',
        ])
    for diag in tu.diagnostics:
        import warnings
        warnings.warn(str(diag))
    if tu.diagnostics:
        warnings.warn(
            "Compiler error usually cause error to program analysis. If encountered downstream exception, consider look at function `parse_file` and alter the compiler flags.")
    return tu


In [72]:

def describe(node):
    return (node.kind.name, getattr(node.kind, "displayname", ""), node.spelling, node.extent)


def describe_concise(node):
    f = node.extent.start.file.name
    return (node.kind.name, getattr(node.kind, "displayname", ""), node.spelling, f"{node.extent.start.line}:{node.extent.start.column}~{node.extent.end.line}:{node.extent.end.column}")


# @lru_cache(maxsize=None)
def get_source_file(file_path):
    with open(file_path) as f:
        return f.read().splitlines(True)


def get_source_line(file_path, start, end):
    lines = get_source_file(file_path)
    return '\n'.join(lines[start.line - 1:end.line])


def describe_with_source(node):
    f = node.extent.start.file.name
    return (f"{node.extent.start.line}:{node.extent.start.column}~{node.extent.end.line}:{node.extent.end.column}",
            get_source_line(f, node.extent.start, node.extent.end))

def traverse_children(node: 'clang.cindex.Cursor', depth=0, describe_func=describe, max_depth=None):
    kwargs = dict(locals())
    kwargs.pop('node')
    kwargs.pop('depth')
    if max_depth is not None and depth > max_depth:
        return
    print('  ' * depth, *describe_func(node))
    for child in node.get_children():
        traverse_children(child, **kwargs, depth=depth + 1)


def locate_function_decl(root, name: str):
    """
    Traverse the AST to find the function declaration with the spelling matching the `name`.
    :param root:
    :param name:
    :return:
    """

    def _traverse(node: 'clang.cindex.Cursor'):
        if node.kind == clang.cindex.CursorKind.FUNCTION_DECL and node.spelling == name:
            return node
        for child in node.get_children():
            result = _traverse(child)
            if result is not None:
                return result
        return None

    return _traverse(root)


def get_used_node_types(target):
    node_types = set()

    def get_all_used_node_types(node):
        node_types.add(node.kind)
        for child in node.get_children():
            get_all_used_node_types(child)

    get_all_used_node_types(target)
    return node_types



In [73]:

def parse_args():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('source_path', type=Path, )
    parser.add_argument('--target_function', type=str, default="main")
    return parser.parse_args()

In [74]:
path = Path("/home/junda/Scad/runtime/test/src/compress/simplemt.cpp")
target_function = "main"

tu = parse_file(path)
_main_func = locate_function_decl(tu.cursor, target_function)
if not _main_func:
    raise ValueError(f"Cannot find function {target_function}")

In [75]:
def traverse_top_ish(node, depth=0, describe_func=describe_concise):
    """Only if the staetment is a compound statement you wil"""
    kwargs = dict(locals())
    kwargs.pop('node')
    kwargs.pop('depth')
    if node.kind not in [CursorKind.RETURN_STMT, CursorKind.PARM_DECL]:
        print('  ' * depth, *describe_func(node))
    for child in node.get_children():
        if node.kind not in [CursorKind.COMPOUND_STMT, CursorKind.FUNCTION_DECL]:
            continue
        traverse_top_ish(child, **kwargs, depth=depth + 1)
    return
        

In [76]:
def collect_top_ish(root):
    result = [] # Collect the leaves
    def _collect(node):
        if node.kind not in [CursorKind.COMPOUND_STMT, CursorKind.FUNCTION_DECL] + [CursorKind.RETURN_STMT, CursorKind.PARM_DECL]:
            result.append(node)
        for child in node.get_children():
            if node.kind not in [CursorKind.COMPOUND_STMT, CursorKind.FUNCTION_DECL]:
                continue
            _collect(child)
        return
    _collect(root)
    return result

In [77]:
def describe_with_offset(node):
    f = node.extent.start.file.name
    return (node.kind.name, getattr(node.kind, "displayname", ""), node.spelling, f"{node.extent.start.line}:{node.extent.start.column}~{node.extent.end.line}:{node.extent.end.column}", f"offset={node.extent.start.offset}~{node.extent.end.offset}")


In [78]:
# traverse_top_ish(_main_func, describe_func=describe_with_offset)
result = collect_top_ish(_main_func)
for i in result:
    print(*describe_concise(i))
result

CALL_EXPR  operator<< 36:5~36:45
DECL_STMT   37:5~37:28
DECL_STMT   38:5~38:19
FOR_STMT   39:5~41:6
FOR_STMT   44:5~46:6
CALL_EXPR  operator<< 48:5~48:43


[<clang.cindex.Cursor at 0x7f3fe007ddc0>,
 <clang.cindex.Cursor at 0x7f3fe007d7c0>,
 <clang.cindex.Cursor at 0x7f3fe007dac0>,
 <clang.cindex.Cursor at 0x7f3fe007d3c0>,
 <clang.cindex.Cursor at 0x7f3fd01ba9c0>,
 <clang.cindex.Cursor at 0x7f3fd01bacc0>]

In [79]:
# Now copy the original file, and insert the annotation.
import shutil
path.name
new_path = path.parent / (path.stem + "_annotated.cpp")
shutil.copy(path, new_path)

PosixPath('/home/junda/Scad/runtime/test/src/compress/simplemt_annotated.cpp')

In [80]:
def get_time_macro_start(name, line):
    return '{' + f'__stgst("{name}", {line});' + '}'

def get_time_macro_end(name, line):
    return '{' + f'__stged("{name}", {line});' + '}'

In [81]:
codes = get_source_file(path)
codes = [i.strip('\n') for i in codes]


In [82]:
# Now traverse the `result` in reverse order, and insert the annotation.
# at the end of the offset, insert the annotation 
for i, node in enumerate(result[::-1]):
    name_idx = len(result) - i
    print(node.extent.start.line, node.extent.start.column, node.extent.end.line, node.extent.end.column, get_source_line(node.extent.start.file.name, node.extent.start, node.extent.end))
    st, ed = node.extent.start.line, node.extent.end.line
    st_macro = get_time_macro_start(name_idx, st)
    codes[st - 1] = st_macro + codes[st - 1]
    print(st, codes[st - 1])

    ed_macro = get_time_macro_end(name_idx, ed)
    codes[ed - 1] = codes[ed - 1] + ed_macro
    print(ed, codes[ed - 1])

codes = [
    '#include <chrono>',
    '#define __stgcommon(name, line, status) {auto t = std::chrono::high_resolution_clock::now(); std::cerr << name << "," << status << "," << line << "," << std::chrono::duration_cast<std::chrono::nanoseconds>(t.time_since_epoch()).count() << std::endl;}',
    '#define __stgst(name, line) {__stgcommon(name, line, "start")}',
    '#define __stged(name, line) {__stgcommon(name, line, "end")}',
] + codes

codes = [i + '\n' for i in codes]

with open(new_path, 'w') as f:
    f.writelines(codes)

48 5 48 43     cout << "Main thread finished" << endl;

48 {__stgst("6", 48);}    cout << "Main thread finished" << endl;
48 {__stgst("6", 48);}    cout << "Main thread finished" << endl;{__stged("6", 48);}
44 5 46 6     for (int i = 0; i < size; i++){

        threads[i].join();

    }

44 {__stgst("5", 44);}    for (int i = 0; i < size; i++){
46     }{__stged("5", 46);}
39 5 41 6     for (int i = 0; i < size; i++){

        threads.push_back(thread(work, i, size));

    }

39 {__stgst("4", 39);}    for (int i = 0; i < size; i++){
41     }{__stged("4", 41);}
38 5 38 19     int size = 32;

38 {__stgst("3", 38);}    int size = 32;
38 {__stgst("3", 38);}    int size = 32;{__stged("3", 38);}
37 5 37 28     vector<thread> threads;

37 {__stgst("2", 37);}    vector<thread> threads;
37 {__stgst("2", 37);}    vector<thread> threads;{__stged("2", 37);}
36 5 36 45     cout << "Hello from main thread" << endl;

36 {__stgst("1", 36);}    cout << "Hello from main thread" << endl;
36 {__stgst("1", 

In [83]:
! make simplemt_annotated.out
! ./simplemt_annotated.out 2>simplemt_annotated.cpulog.csv

make: 'simplemt_annotated.out' is up to date.
Hello from main thread
Hello from thread 0 of 32
Hello from thread 1 of 32
Hello from thread 2 of 32
Hello from thread 3 of 32
Hello from thread 4 of 32
Hello from thread 5 of 32
Hello from thread 6 of 32
Hello from thread 7 of 32
Hello from thread 8 of 32
Hello from thread 9 of 32
Hello from thread Hello from thread 1011 of  of 3232

Hello from thread 12 of 32
Hello from thread 13 of 32
Hello from thread 14 of 32
Hello from thread 15 of 32
Hello from thread 16 of 32
Hello from thread 17 of 32
Hello from thread 18 of 32
Hello from thread 19 of 32
Hello from thread 20 of 32
Hello from thread 21 of 32
Hello from thread 22 of 32
Hello from thread 23 of 32
Hello from thread 24 of 32
Hello from thread 25 of 32
Hello from thread 26 of 32
Hello from thread 27 of 32
Hello from thread 28 of 32
Hello from thread 29 of 32
Hello from thread 30 of 32
Hello from thread 31 of 32
Thread 3 finished
Thread 15 finished
Thread 13 finished
Thread 27 finished
Th

In [None]:
! cat simplemt_annotated.cpulog.csv