In [25]:
import re
import json
from collections import defaultdict

# Remove duplicates from a list while preserving the original order
def remove_duplicates_preserve_order(items):
    seen = set()
    result = []
    for item in items:
        if item not in seen:
            seen.add(item)
            result.append(item)
    return result

# Parse a GCC-style control flow graph (CFG) text into structured data
def parse_cfg(raw_text):
    functions = []
    current_function = None
    successors = defaultdict(list)
    basic_blocks = defaultdict(list)
    current_bb = None

    for line in raw_text.splitlines():
        # Detect the beginning of a new function
        func_match = re.match(r';; Function (\w+)', line)
        if func_match:
            if current_function:
                # Store the previously parsed function data
                functions.append((current_function, dict(successors), dict(basic_blocks)))
                successors.clear()
                basic_blocks.clear()
            current_function = func_match.group(1)
            current_bb = None

        # Extract basic block successors from lines like: ;; 1 succs { 2 3 }
        succ_match = re.match(r';;\s*(\d+)\s+succs\s+\{([^}]*)\}', line)
        if succ_match:
            from_bb = int(succ_match.group(1))
            to_bbs = list(map(int, succ_match.group(2).split()))
            successors[from_bb].extend(to_bbs)

        # Identify basic block headers such as: <bb 2>:
        bb_match = re.match(r'\s*<bb\s+(\d+)>', line)
        if bb_match:
            current_bb = int(bb_match.group(1))
            continue

        # Collect source code references inside each basic block
        if current_bb is not None:
            code_match = re.search(r'\[(\w+\.c:\d+):\d+\]', line)
            if code_match:
                basic_blocks[current_bb].append(code_match.group(1))

    # Add the last function if present
    if current_function:
        functions.append((current_function, dict(successors), dict(basic_blocks)))

    return functions

# Export the list of parsed CFGs to a JSON file in a structured format
def export_all_cfgs_to_json(functions, output_file="cfg_all_functions.json"):
    all_graphs = []

    for func_name, edges, blocks in functions:
        # Build a list of edges between basic blocks
        edge_list = [[f"bb{src}", f"bb{dst}"] for src in edges for dst in edges[src]]

        # Build a dictionary of basic blocks and their associated line numbers
        nodes = {
            f"bb{bb}": {
                "lines": remove_duplicates_preserve_order(lines)
            } for bb, lines in blocks.items()
        }

        cfg_json = {
            "function": func_name,
            "edges": edge_list,
            "nodes": nodes
        }

        all_graphs.append(cfg_json)

    with open(output_file, "w") as f:
        json.dump(all_graphs, f, indent=2)

    print(f"All CFGs exported to '{output_file}'")

# Read the CFG text from a file and process it
with open("tcas1.c.015t.cfg", "r") as f:
    cfg_text = f.read()

functions = parse_cfg(cfg_text)
export_all_cfgs_to_json(functions)


All CFGs exported to 'cfg_all_functions.json'


In [24]:
import json

def replace_line_references_with_content(config):
    file_cache = {}

    for func in config:
        for bb, node_data in func.get("nodes", {}).items():
            new_lines = []
            for ref in node_data.get("lines", []):
                filename, lineno_str = ref.split(":")
                lineno = int(lineno_str)

                # Cache the file contents to avoid reopening
                if filename not in file_cache:
                    with open(filename, "r") as f:
                        file_cache[filename] = f.readlines()

                file_lines = file_cache[filename]
                if 1 <= lineno <= len(file_lines):
                    line_content = file_lines[lineno - 1].strip()
                    new_lines.append(line_content)
                else:
                    new_lines.append(f"<invalid line: {ref}>")

            func["nodes"][bb]["lines"] = new_lines

    return config


# Load the config from a file or variable
with open("cfg_all_functions.json", "r") as f:
    config = json.load(f)

updated_config = replace_line_references_with_content(config)

# Save or print the result
with open("config_updated.json", "w") as f:
    json.dump(updated_config, f, indent=2)
