In [1]:
import os
import angr
from glob import glob
import re
import json
import pickle
import logging
import networkx as nx
from time import time

In [41]:
from time import time
logging.disable(logging.CRITICAL)

# The address of file goes here.
file_add = ''

# The graph label goes here.
graph_label = ''

# The directory for saving the graph json data goes here.
save_dir_addr = ''

file_name = file_add.split('/')[-1]

try:
    s_time = time()
    # read the executable by angr and extract the CFG into the G variable.
    p = angr.Project(file_add, load_options = {'auto_load_libs': False})
    cfg = p.analyses.CFGFast(force_complete_scan = False, normalize = True)
    main_func = cfg.kb.functions['main']
    G = main_func.transition_graph
    
    # Check if G has multiple diconnected components.
    num_components = nx.number_weakly_connected_components(G)
    if num_components != 1:
        raise Exception('f{file_add} Number of Components is larger than one - {num_components}')


    edge_list = []
    node_dict = {}

    # Go over each edge and for the nodes involved extract the name and insturctions.
    for edge_nodes in G.edges:
        for src_dst_flag, node in enumerate(edge_nodes):
            node_info = cfg.get_any_node(node.addr)
            if node_info and node.addr not in node_dict.keys(): 
                node_dict[node.addr] = {'node_name': None, 'node_label': re.search(r"\[(.*?)\]", str(node_info)).group(1), 'insns': None}
                # print(str(node_info))

                if node_info.name:
                    node_dict[node.addr]['node_name'] = node_info.name

                if node_info.block:
                    insn_list = []
                    for insn_obj, insn_str in zip(node_info.block.disassembly.insns, str(node_info.block.disassembly).split('\n')):
                        insn_list.append((insn_obj.insn.address, insn_obj.insn.mnemonic, insn_str.split('\t')[-1]))

                    node_dict[node.addr]['insns'] = insn_list

        edge_list.append((edge_nodes[0].addr, edge_nodes[1].addr))
    
    # save the information for G in graph_data dictionariy.
    graph_data = {'node_dict': node_dict, 'edge_list': edge_list, 'label': graph_label}

    # Find and save the start node in graph_data dictionary.
    if main_func.startpoint:
        startpoint = main_func.startpoint.addr
        if startpoint != main_func.addr or main_func.addr not in graph_data['node_dict'].keys():
            raise Exception(f'For {file_add}, starting point ({startpoint}) is not equal to the address of the main function ({main_func.addr}).')
    else:
        startpoint = main_func.startpoint
    graph_data['startpoint'] = startpoint

    # Find and save the end node in graph_data dictionary.
    if main_func.endpoints:
        endpoints = [i.addr for i in main_func.endpoints]
        for endpoint in endpoints:
            if endpoint not in graph_data['node_dict'].keys():
                raise Exception(f'{file_add} Ending node does not exist in the graph')
    else:
        endpoints = main_func.endpoints
    graph_data['endpoints'] = endpoints

    # Save graph_data.
    with open(os.path.join(save_dir_addr, file_name + '.json'), 'w') as fp:
        json.dump(graph_data, fp)

except Exception as e:
    print(e)
        
logging.disable(logging.NOTSET)

39912