Ce script lit les deux premiers fichiers `.json` représentant des graphes DOT, identifie les types de nœuds (entry, call, jmp...), trace un sous-graphe interactif avec flèches directionnelles et couleurs, et génère une visualisation HTML par graphe.  
Chaque graphe est stylisé avec une légende, un fond clair, et les flèches indiquent le flot de contrôle.


In [61]:
import os
import re
import networkx as nx
import plotly.graph_objects as go
import webbrowser

input_dir = "folder_test_set"
output_dir = "graph_viz"
os.makedirs(output_dir, exist_ok=True)

COLOR_MAP = {
    "ENTRY": "black",
    "CALL": "orange",
    "JMP": "skyblue",
    "JCC": "violet",
    "RET": "deepskyblue",
    "OTHER": "gray"
}

def parse_dot_to_graph(dot_str):
    G = nx.DiGraph()
    lines = dot_str.strip().split("\n")
    for line in lines:
        line = line.strip()
        if "->" in line:
            match = re.match(r'"([^"]+)"\s*->\s*"([^"]+)"', line)
            if match:
                src, dst = match.groups()
                G.add_edge(src, dst)
        elif "[" in line and "label =" in line:
            match = re.match(r'"([^"]+)"\s*\[label = "(.*?)"\]', line)
            if match:
                node_id, label = match.groups()
                parts = label.split(" : ")
                instr_text = parts[1] if len(parts) > 1 else ""
                G.add_node(node_id, text=instr_text.upper())
    return G

def get_instr_class(instr, node, entry_node):
    if node == entry_node:
        return "ENTRY"
    if "CALL" in instr:
        return "CALL"
    if "JMP" in instr and not "JCC" in instr:
        return "JMP"
    if "JCC" in instr:
        return "JCC"
    if "RET" in instr:
        return "RET"
    return "OTHER"

def find_entry_node(G):
    for node, data in G.nodes(data=True):
        if "ENTRY" in data.get("text", ""):
            return node
    entry_candidates = [n for n in G.nodes if G.in_degree(n) == 0]
    if entry_candidates:
        return entry_candidates[0]
    return list(G.nodes)[0]

def graph_to_html(G, graph_id, max_nodes=50):
    nodes = list(G.nodes())[:max_nodes]
    H = G.subgraph(nodes).copy()
    pos = nx.spring_layout(H, seed=42)

    entry_node = find_entry_node(H)

    node_x, node_y, node_color, node_class, node_hover = [], [], [], [], []
    for node in H.nodes():
        x, y = pos[node]
        instr = H.nodes[node].get("text", "")
        cls = get_instr_class(instr, node, entry_node)
        node_x.append(x)
        node_y.append(y)
        node_color.append(COLOR_MAP[cls])
        node_class.append(cls)
        hover_label = "ENTRY" if node == entry_node else instr
        node_hover.append(hover_label)

    traces = []
    for cls in COLOR_MAP:
        indices = [i for i, c in enumerate(node_class) if c == cls]
        if indices:
            traces.append(go.Scatter(
                x=[node_x[i] for i in indices],
                y=[node_y[i] for i in indices],
                mode='markers',
                hovertext=[node_hover[i] for i in indices],
                hoverinfo='text',
                marker=dict(color=COLOR_MAP[cls], size=12, line=dict(width=1, color='black')),
                name=cls
            ))

    annotations = []
    for src, dst in H.edges():
        if src in pos and dst in pos:
            x0, y0 = pos[src]
            x1, y1 = pos[dst]
            annotations.append(dict(
                ax=x0, ay=y0,
                x=x1, y=y1,
                xref='x', yref='y',
                axref='x', ayref='y',
                showarrow=True,
                arrowhead=3,
                arrowsize=2,
                arrowwidth=1.5,
                arrowcolor='gray',
                opacity=0.7
            ))

    fig = go.Figure(data=traces,
                   layout=go.Layout(
                       title=f"Graphe interactif - {graph_id}",
                       showlegend=True,
                       annotations=annotations,
                       hovermode='closest',
                       margin=dict(b=20, l=5, r=5, t=40),
                       xaxis=dict(showgrid=False, zeroline=False, visible=False),
                       yaxis=dict(showgrid=False, zeroline=False, visible=False),
                       plot_bgcolor='white',
                       legend=dict(
                           x=1.01,
                           y=1,
                           bgcolor='rgba(255,255,255,0.6)',
                           bordercolor='gray',
                           borderwidth=1
                       )
                   ))

    output_path = os.path.abspath(os.path.join(output_dir, f"{graph_id}_with_arrows.html"))
    fig.write_html(output_path)
    webbrowser.open(f"file://{output_path}")

json_files = sorted([f for f in os.listdir(input_dir) if f.endswith(".json")])
for file in json_files[:2]:
    graph_id = file.replace(".json", "")
    file_path = os.path.join(input_dir, file)
    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        dot_str = f.read()
    G = parse_dot_to_graph(dot_str)
    if G.number_of_nodes() > 0:
        graph_to_html(G, graph_id)
