In [1]:

from elftools.elf.elffile import ELFFile
from elftools.dwarf.descriptions import (
    describe_DWARF_expr, set_global_machine_arch)
from elftools.dwarf.locationlists import (
    LocationEntry, LocationExpr, LocationParser)
import os
import posixpath
binFileName = 'stacktest'
# filePath = './../../binaries/gnuit/src/' + binFileName

filePath = './../../binaries/c_many/'+binFileName


In [2]:
from collections import defaultdict
def line_entry_mapping(line_program,CU):
    filename_map = defaultdict(int)

    # The line program, when decoded, returns a list of line program
    # entries. Each entry contains a state, which we'll use to build
    # a reverse mapping of filename -> #entries.
    lp_entries = line_program.get_entries()
    if len(lp_entries)==0:
        return None
    for lpe in lp_entries:
        # We skip LPEs that don't have an associated file.
        # This can happen if instructions in the compiled binary
        # don't correspond directly to any original source file.
        if not lpe.state:# or lpe.state.file == 0
            continue
        filename = lpe_filename(line_program, lpe.state.file,CU)[0]
        filename_map[filename] += 1

    # for filename, lpe_count in filename_map.items():
    #     print("    filename=%s -> %d entries" % (filename, lpe_count))
    return filename_map

def lpe_filename(line_program, file_index, CU):
    
    
    die_dict = {}                    
    for attr in CU.get_top_DIE().attributes.values():
        die_dict[attr.name] = attr
    
    
    compilation_command = die_dict['DW_AT_producer'].value.decode("utf-8")
    
    if 'clang' in compilation_command.lower():
        COMPILER_SUBSTRACT = 1
    elif 'gnu' in compilation_command.lower():
        COMPILER_SUBSTRACT = 0
    lp_header = line_program.header
    file_entries = lp_header["file_entry"]
#     print(COMPILER_SUBSTRACT, compilation_command)
    
    # File and directory indices are 1-indexed.
    file_entry = file_entries[file_index -COMPILER_SUBSTRACT]
    dir_index = file_entry["dir_index"]

    # A dir_index of 0 indicates that no absolute directory was recorded during
    # compilation; return just the basename.
    if dir_index == 0:
        return file_entry.name.decode(),dir_index
    directory = lp_header["include_directory"][dir_index -COMPILER_SUBSTRACT]
    return posixpath.join(directory, file_entry.name).decode(),dir_index


def show_loclist(loclist, dwarfinfo, indent, cu_offset):
    """ Display a location list nicely, decoding the DWARF expressions
        contained within.
    """
    d = []
    for loc_entity in loclist:
        if isinstance(loc_entity, LocationEntry):
            d.append('%s <<%s>>' % (
                loc_entity,
                describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs, cu_offset)))
        else:
            d.append(str(loc_entity))
    return '\n'.join(indent + s for s in d)


########################################################
######################   DWARF PERSER #######################
###########################################################


def get_DIE_at_offset(CU, offset):
        for die in CU.iter_DIEs():
            if die.offset == CU.cu_offset+offset:
                return die 
        return None


##TODO FIX CONSTANT TYPE
def get_type_name(CU, offset):#get_DIE_at_offset(CU,attr.value)
    die = get_DIE_at_offset(CU, offset)
    
    if die.tag == 'DW_TAG_const_type':
        return "const"
    
    if die.tag == 'DW_TAG_pointer_type' :
        for _attr in die.attributes.values():
            if _attr.name== "DW_AT_type":
                
                return "*"+get_type_name(CU, _attr.value) 

    elif die.tag =='DW_TAG_subroutine_type':
        

        for _attr in die.attributes.values():
            if _attr.name== "DW_AT_sibling":
                return get_type_name(CU, _attr.value) 
            
            if _attr.name== "DW_AT_type":
                return "*"+get_type_name(CU, _attr.value) 

    for attr in die.attributes.values():
        if attr.name== "DW_AT_name":
            return attr.value.decode("utf-8")


    
    

    
############################################################
############################# CLANG #######################
###########################################################

# FUNCTION_DECL
# https://stackoverflow.com/questions/43460605/function-boundary-identification-using-libclang
# https://eli.thegreenplace.net/2011/07/03/parsing-c-in-python-with-clang


import clang.cindex



def get_all_var_types(source_path):
    srcFileName = source_path.split('/')[-1]
    idx = clang.cindex.Index.create()
    tu = idx.parse(source_path)
    
    for f in tu.cursor.walk_preorder():
        if f.kind == clang.cindex.CursorKind.VAR_DECL:
            # print(dir(f))
            print('file.name: ',f.extent.start.file.name)
            originFileName = f.extent.start.file.name.split('/')[-1]
            print('originFileName :',originFileName)
            print('displayname:  ',f.displayname)
            print('type.spelling:  ',f.type.spelling)
            print('f.extent.start.line: ', f.extent.start.line, "col: ",f.extent.start.column)
            print('\n')
            
            

   

def get_all_function_types(source_path):
    function_boundary_by_name = {}
    idx = clang.cindex.Index.create()
    tu = idx.parse(source_path)
    
    for f in tu.cursor.walk_preorder():
        if f.kind == clang.cindex.CursorKind.FUNCTION_DECL:
            # print(dir(f))
            print(f.displayname)
            print('function name: ',( f.spelling))
            print('Returns: ',(f.result_type.spelling))
            
            
            arg_len = len(list(f.type.argument_types()))
            if arg_len>0:
                arg_types = list(f.type.argument_types())
                for arg_type in arg_types:
                    print('arg_type:',arg_type.spelling)
                args = list(f.get_arguments())
                for arg in args:
                    print('arg:',arg.spelling)


            print("\n\n\n")

            
def get_function_boundaries(source_path):
    
    function_boundary_by_name = {}
    idx = clang.cindex.Index.create()
    tu = idx.parse(source_path)
    
    for f in tu.cursor.walk_preorder():
        if f.kind == clang.cindex.CursorKind.FUNCTION_DECL:

            function_name = f.displayname.split('(')[0]
            function_boundary_by_name[function_name]={}
            function_boundary_by_name[function_name] = { 'src_path':f.extent.start.file.name,
                              'src_file':f.extent.start.file.name.split('/')[-1],
                              'start_line':f.extent.start.line,
                              'start_col':f.extent.start.column,
                              'end_line':f.extent.end.line,
                              'end_col':f.extent.end.column}
    return function_boundary_by_name

def get_containing_function(source_file_path, line, col=0):
    function_boundary_by_name = get_function_boundaries(source_file_path)
    
    for function_name, item in function_boundary_by_name.items():
        if item['src_path'] == source_file_path:
            if line>= item['start_line'] and line<= item['end_line']:
                return function_name
        
_=get_function_boundaries('/home/nahid/reverse/binaries/c_many/stack.c' )

def form_function_bound_metrix(src_bounds, src_file_name):
    bounds = {}
    for func_info in src_bounds.items():
        if func_info[1]['src_file'] == src_file_name:
            start_line  = func_info[1]['start_line']
            end_line    = func_info[1]['end_line']
            print(func_info[0] ,start_line , end_line)
            for i in range(start_line , end_line+1):
                bounds[i] = func_info[0]
               
    return bounds

src_line_to_function_matrix = form_function_bound_metrix(_ , 'stack.c')
print('form_function_bound_metrix',src_line_to_function_matrix)




def find_variables_per_line(source_path , line_to_function_matrix , dwarf_FUNC_PARAMS):
    srcFileName = source_path.split('/')[-1]
    idx = clang.cindex.Index.create()
    tu = idx.parse(source_path)
    
    var_usage_matrix = {}
    
    for f in tu.cursor.walk_preorder():
        if f.kind == clang.cindex.CursorKind.DECL_REF_EXPR or f.kind == clang.cindex.CursorKind.VAR_DECL:
            
#             print('\n\nfile.name: ',f.extent.start.file.name)
            originFileName = f.extent.start.file.name.split('/')[-1]
            
            if srcFileName!=originFileName:
                continue

            print('\n\noriginFileName :',originFileName)
            print('displayname:  ',f.displayname)
            print('type.spelling:  ',f.type.spelling)
            print('f.extent.start.line: ', f.extent.start.line, "col: ",f.extent.start.column)
            
            line = f.extent.start.line
            col =f.extent.start.column
            type_info = f.type.spelling
            var_name = f.displayname
            
            if line not in var_usage_matrix:
                var_usage_matrix[line] = {}
            if var_name in dwarf_FUNC_PARAMS[source_path][line_to_function_matrix[line]]:
                var_usage_matrix[line][col] = {
                                'name'       : f.displayname ,
                                'dwarf_info' : dwarf_FUNC_PARAMS[source_path][line_to_function_matrix[line]][var_name],
                                'type'       : f.type.spelling }
    return var_usage_matrix

            




push 6 18
dummy 19 37
pop 39 51
fake 53 65
form_function_bound_metrix {6: 'push', 7: 'push', 8: 'push', 9: 'push', 10: 'push', 11: 'push', 12: 'push', 13: 'push', 14: 'push', 15: 'push', 16: 'push', 17: 'push', 18: 'push', 19: 'dummy', 20: 'dummy', 21: 'dummy', 22: 'dummy', 23: 'dummy', 24: 'dummy', 25: 'dummy', 26: 'dummy', 27: 'dummy', 28: 'dummy', 29: 'dummy', 30: 'dummy', 31: 'dummy', 32: 'dummy', 33: 'dummy', 34: 'dummy', 35: 'dummy', 36: 'dummy', 37: 'dummy', 39: 'pop', 40: 'pop', 41: 'pop', 42: 'pop', 43: 'pop', 44: 'pop', 45: 'pop', 46: 'pop', 47: 'pop', 48: 'pop', 49: 'pop', 50: 'pop', 51: 'pop', 53: 'fake', 54: 'fake', 55: 'fake', 56: 'fake', 57: 'fake', 58: 'fake', 59: 'fake', 60: 'fake', 61: 'fake', 62: 'fake', 63: 'fake', 64: 'fake', 65: 'fake'}


In [15]:

##############################################################################
#################   RELATE FUNCTIONS TO ADDRESSES #############################
###############################################################################

import collections
            

lineinfo_address_subprogram = {}
with open(filePath, 'rb') as f:
    elffile = ELFFile(f)

    if not elffile.has_dwarf_info():
        print('  file has no DWARF info')
        exit(0)

    dwarfinfo = elffile.get_dwarf_info()
    for CU in dwarfinfo.iter_CUs():
#         print(CU.get_top_DIE()['DW_AT_comp_dir'])
        CU_DIR_PATH = None
        CU_FILENAME = None
        for attr in CU.get_top_DIE().attributes.values():
            if attr.name == 'DW_AT_comp_dir':
                CU_DIR_PATH = attr.value.decode("utf-8")
            if attr.name == 'DW_AT_name':
                CU_FILENAME = attr.value.decode("utf-8")
        
        print('  Found a compile unit at offset %s, length %s' % (
            CU.cu_offset, CU['unit_length']))

        # Every compilation unit in the DWARF information may or may not
        # have a corresponding line program in .debug_line.
        line_program = dwarfinfo.line_program_for_CU(CU)
        if line_program is None:
            print('  DWARF info is missing a line program for this CU')
            continue
        
        cu_file_path  = os.path.join(CU_DIR_PATH, CU_FILENAME)
        print(cu_file_path)
        
    
        bounds_matrix = form_function_bound_metrix( get_function_boundaries(cu_file_path)  , CU_FILENAME)
        for line_entry in line_program.get_entries():
          
            if line_entry.state!= None:
                lineinfo_address_subprogram[line_entry.state.address]  =   {
                    'func':bounds_matrix[line_entry.state.line],
                    'lineinfo':line_entry.state
                } 

#TODO make efficient with valid address only

lineinfo_address_subprogram = collections.OrderedDict(sorted(lineinfo_address_subprogram.items()))
lineinfo_address_subprogram_complete = {}

min_address = min(lineinfo_address_subprogram.keys())
max_address = max(lineinfo_address_subprogram.keys())

temp_subprogram = lineinfo_address_subprogram[min_address]
for i in range(min_address,max_address+1):
    if i in lineinfo_address_subprogram:
        temp_subprogram = lineinfo_address_subprogram[i]
    lineinfo_address_subprogram_complete[i] = temp_subprogram
lineinfo_address_subprogram_complete




  Found a compile unit at offset 0, length 986
/home/nahid/reverse/binaries/c_many/stack.c
push 6 18
dummy 19 37
pop 39 51
fake 53 65
  Found a compile unit at offset 990, length 568
/home/nahid/reverse/binaries/c_many/main.c
main 7 30
  Found a compile unit at offset 1562, length 484
/home/nahid/reverse/binaries/c_many/calculate.c
add 5 17
addf 18 20
substract 23 25
substractf 26 28


{4521: {'func': 'push',
  'lineinfo': <LineState 7f28f81a8160:
    address = 0x11a9
    file = 1
    line = 6
    column = 40
    is_stmt = 1
    basic_block = False
    end_sequence = False
    prologue_end = False
    epilogue_begin = False
    isa = 0
    discriminator = 0>},
 4522: {'func': 'push',
  'lineinfo': <LineState 7f28f81a8160:
    address = 0x11a9
    file = 1
    line = 6
    column = 40
    is_stmt = 1
    basic_block = False
    end_sequence = False
    prologue_end = False
    epilogue_begin = False
    isa = 0
    discriminator = 0>},
 4523: {'func': 'push',
  'lineinfo': <LineState 7f28f81a8160:
    address = 0x11a9
    file = 1
    line = 6
    column = 40
    is_stmt = 1
    basic_block = False
    end_sequence = False
    prologue_end = False
    epilogue_begin = False
    isa = 0
    discriminator = 0>},
 4524: {'func': 'push',
  'lineinfo': <LineState 7f28f81a8160:
    address = 0x11a9
    file = 1
    line = 6
    column = 40
    is_stmt = 1
    basic_block = 

In [4]:
# https://github.com/eliben/pyelftools/blob/master/examples/dwarf_location_info.py

LOCATION_SUBSTRACT_FACTOR = 0

from elftools.dwarf.locationlists import LocationParser, LocationExpr
FUNC_PARAMS = {}
def process_file(filename):
    print('Processing file:', filename)
    with open(filename, 'rb') as f:
        elffile = ELFFile(f)

        if not elffile.has_dwarf_info():
            print('  file has no DWARF info')
            return

        # get_dwarf_info returns a DWARFInfo context object, which is the
        # starting point for all DWARF-based processing in pyelftools.
        dwarfinfo = elffile.get_dwarf_info()
        # The location lists are extracted by DWARFInfo from the .debug_loc
        # section, and returned here as a LocationLists object.
        location_lists = dwarfinfo.location_lists()
        

        # This is required for the descriptions module to correctly decode
        # register names contained in DWARF expressions.
        set_global_machine_arch(elffile.get_machine_arch())

        # Create a LocationParser object that parses the DIE attributes and
        # creates objects representing the actual location information.
        loc_parser = LocationParser(location_lists)
        
        section_offset = dwarfinfo.debug_info_sec.global_offset
        # Offset of the .debug_info section in the stream
        


        for CU in dwarfinfo.iter_CUs():
            CU_DIR_PATH = None
            CU_FILENAME = None
            for attr in CU.get_top_DIE().attributes.values():
                if attr.name == 'DW_AT_comp_dir':
                    CU_DIR_PATH = attr.value.decode("utf-8")
                if attr.name == 'DW_AT_name':
                    CU_FILENAME = attr.value.decode("utf-8")
                
            line_program = dwarfinfo.line_program_for_CU(CU)
            filename_map = line_entry_mapping(line_program,CU)
            if filename_map==None:
                continue
         
            CU_dictionary_key = os.path.join(CU_DIR_PATH, CU_FILENAME)
            if CU_dictionary_key not in FUNC_PARAMS:
                FUNC_PARAMS[CU_dictionary_key] = {}
            
            print('  Found a compile unit at offset %s, length %s' % (
                CU.cu_offset, CU['unit_length']))

            # A CU provides a simple API to iterate over all the DIEs in it.
            die_depth = 0
            are_DIEs_of_function = False
            FUNC_name = None
            for DIE in CU.iter_DIEs():
                
                ############################################################
                #############   Prasing Function DIEs start ################
                
                
                if DIE.tag == 'DW_TAG_subprogram':
                    if 'DW_AT_low_pc' in DIE.attributes and 'DW_AT_high_pc' in DIE.attributes :
                        low_pc = DIE.attributes['DW_AT_low_pc'].value
                        high_pc = DIE.attributes['DW_AT_high_pc'].value
                        
                        print("Low PC: ",hex(low_pc) , " High PC" , hex(high_pc))
                    else:
                        print("NO PC given")
                    are_DIEs_of_function = True
                    
                    for attr in DIE.attributes.values():
                        if attr.name == "DW_AT_name": #FUNC NAME
                            FUNC_name = attr.value.decode("utf-8")
                            if FUNC_name not in FUNC_PARAMS[CU_dictionary_key]:
                                FUNC_PARAMS[CU_dictionary_key][FUNC_name] ={}
                            print("SUBPROGRAM: ",FUNC_name)
                            
                if DIE.tag == 'DW_TAG_formal_parameter' or DIE.tag =='DW_TAG_variable':
                    tags = [attr.name for attr in DIE.attributes.values()]
                    PARAM_name = None
                    if FUNC_name==None:
                        
                        FUNC_name ="global"
                        
                        if FUNC_name not in FUNC_PARAMS[CU_dictionary_key]:
                            FUNC_PARAMS[CU_dictionary_key][FUNC_name]={}
                        
                    if "DW_AT_name" in tags:
                        
                        die_dict = {}
                        
                        for attr in DIE.attributes.values():
                            die_dict[attr.name] = attr
                        
                        PARAM_name = die_dict['DW_AT_name'].value.decode("utf-8")
                        
                        if PARAM_name not in FUNC_PARAMS[CU_dictionary_key][FUNC_name]:
                            FUNC_PARAMS[CU_dictionary_key][FUNC_name][PARAM_name] = {}
                        var_type = DIE.tag.split('_')[-1]
                        FUNC_PARAMS[CU_dictionary_key][FUNC_name][PARAM_name] = {'type':get_type_name(CU,die_dict['DW_AT_type'].value) , 'kind':var_type}
                        
#                         print(die_dict)
                        # Check if this attribute contains location information
#                         if loc_parser.attribute_has_location(die_dict['DW_AT_location'], CU['version']):
                        if 'DW_AT_location' in die_dict:

                            try:
                                loc = loc_parser.parse_from_attribute(die_dict['DW_AT_location'],
                                                                      CU['version'])
                                
#                                 print(CU_dictionary_key,FUNC_name,PARAM_name)
                                if isinstance(loc, LocationExpr):
                                    loc_info_str = describe_DWARF_expr(loc.loc_expr, dwarfinfo.structs, CU.cu_offset)
                                    offset_temp = (loc_info_str.split('-')[-1]).split(')')[0]
#                                     print('1a ',loc_info_str, offset_temp)
#                                     print('1b ', PARAM_name,loc_info_str, int(offset_temp)-LOCATION_SUBSTRACT_FACTOR)
                                    FUNC_PARAMS[CU_dictionary_key][FUNC_name][PARAM_name]["location"]= loc_info_str

                                elif isinstance(loc, list):
                                    print(PARAM_name,show_loclist(loc,dwarfinfo,'      ', CU.cu_offset))
                                    FUNC_PARAMS[CU_dictionary_key][FUNC_name][PARAM_name]["location"]= show_loclist(loc,
                                                       dwarfinfo,'      ', CU.cu_offset)
                            except:

                                print("ERROR",DIE)
                                pass

                ###############################################
                #############  parsing  Function DIEs ends ################
                


                
                if DIE.is_null(): #https://chromium.googlesource.com/chromiumos/third_party/pyelftools/+/25a77f7738d7fe824f2ed4d33a123136b9d8e88a/scripts/readelf.py
                    are_DIEs_of_function = False
                    FUNC_name = None
                    
                    die_depth -= 1
                    continue
                if DIE.has_children:
                    die_depth += 1
                    

process_file(filePath)

Processing file: ./../../binaries/c_many/stacktest
  Found a compile unit at offset 0, length 986
NO PC given
SUBPROGRAM:  free
NO PC given
SUBPROGRAM:  printf
NO PC given
SUBPROGRAM:  malloc
Low PC:  0x134c  High PC 0x1391
SUBPROGRAM:  fake
Low PC:  0x12ea  High PC 0x134c
SUBPROGRAM:  pop
Low PC:  0x120e  High PC 0x12ea
SUBPROGRAM:  dummy
Low PC:  0x11a9  High PC 0x120e
SUBPROGRAM:  push
  Found a compile unit at offset 990, length 568
NO PC given
SUBPROGRAM:  printf
NO PC given
SUBPROGRAM:  pop
NO PC given
SUBPROGRAM:  dummy
NO PC given
SUBPROGRAM:  fake
NO PC given
SUBPROGRAM:  add
NO PC given
SUBPROGRAM:  push
Low PC:  0x1391  High PC 0x1530
SUBPROGRAM:  main
  Found a compile unit at offset 1562, length 484
Low PC:  0x15da  High PC 0x15f8
SUBPROGRAM:  substractf
Low PC:  0x15c4  High PC 0x15da
SUBPROGRAM:  substract
Low PC:  0x15a6  High PC 0x15c4
SUBPROGRAM:  addf
Low PC:  0x1530  High PC 0x15a6
SUBPROGRAM:  add


In [5]:
source_file = '/home/nahid/reverse/binaries/c_many/stack.c'
variable_matrix = find_variables_per_line(source_file, src_line_to_function_matrix , FUNC_PARAMS)
variable_matrix



originFileName : stack.c
displayname:   pop
type.spelling:   int
f.extent.start.line:  7 col:  5


originFileName : stack.c
displayname:   ming
type.spelling:   unsigned int
f.extent.start.line:  8 col:  5


originFileName : stack.c
displayname:   stk
type.spelling:   stack *
f.extent.start.line:  11 col:  5


originFileName : stack.c
displayname:   tmp
type.spelling:   stack *
f.extent.start.line:  11 col:  18


originFileName : stack.c
displayname:   stk
type.spelling:   stack *
f.extent.start.line:  12 col:  5


originFileName : stack.c
displayname:   stk_ptr
type.spelling:   stack **
f.extent.start.line:  12 col:  12


originFileName : stack.c
displayname:   tmp
type.spelling:   stack *
f.extent.start.line:  13 col:  5


originFileName : stack.c
displayname:   malloc
type.spelling:   void *(unsigned long)
f.extent.start.line:  13 col:  11


originFileName : stack.c
displayname:   tmp
type.spelling:   stack *
f.extent.start.line:  14 col:  5


originFileName : stack.c
displayname:

{7: {5: {'name': 'pop',
   'dwarf_info': {'type': 'int',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -40)'},
   'type': 'int'}},
 8: {5: {'name': 'ming',
   'dwarf_info': {'type': 'unsigned int',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -36)'},
   'type': 'unsigned int'}},
 11: {5: {'name': 'stk',
   'dwarf_info': {'type': '*stack',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -32)'},
   'type': 'stack *'},
  18: {'name': 'tmp',
   'dwarf_info': {'type': '*stack',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -24)'},
   'type': 'stack *'}},
 12: {5: {'name': 'stk',
   'dwarf_info': {'type': '*stack',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -32)'},
   'type': 'stack *'},
  12: {'name': 'stk_ptr',
   'dwarf_info': {'type': '**stack',
    'kind': 'parameter',
    'location': '(DW_OP_fbreg: -64)'},
   'type': 'stack **'}},
 13: {5: {'name': 'tmp',
   'dwarf_info': {'type': '*stack',
    'kind': 'variable',
    'location': '(DW_OP_f

In [6]:
# (FUNC_PARAMS)#['/home/nahid/reverse/binaries/c_many/stack.c']['fake']


for cu in FUNC_PARAMS:
    
    for func in FUNC_PARAMS[cu]:
        base_location=-1000000000
        
        for var_ in FUNC_PARAMS[cu][func]:
            if 'location' in FUNC_PARAMS[cu][func][var_]:
                var_location = int(FUNC_PARAMS[cu][func][var_]['location'].split(":")[-1].split(")")[0])
                if var_location>base_location:
                    base_location = var_location
        
        for var_ in FUNC_PARAMS[cu][func]:
            if 'location' in FUNC_PARAMS[cu][func][var_]:
                var_location = int(FUNC_PARAMS[cu][func][var_]['location'].split(":")[-1].split(")")[0])
                FUNC_PARAMS[cu][func][var_]['offset'] = var_location - base_location
        
        print( "base_location: ",base_location)
(FUNC_PARAMS)        

base_location:  -1000000000
base_location:  -1000000000
base_location:  -1000000000
base_location:  -20
base_location:  -24
base_location:  -112
base_location:  -24
base_location:  -1000000000
base_location:  -1000000000
base_location:  -1000000000
base_location:  -1000000000
base_location:  -1000000000
base_location:  -1000000000
base_location:  -34
base_location:  -20
base_location:  -20
base_location:  -20
base_location:  -20


{'/home/nahid/reverse/binaries/c_many/stack.c': {'free': {},
  'printf': {},
  'malloc': {},
  'fake': {'a': {'type': 'int',
    'kind': 'parameter',
    'location': '(DW_OP_fbreg: -36)',
    'offset': -16},
   'b': {'type': 'int',
    'kind': 'parameter',
    'location': '(DW_OP_fbreg: -40)',
    'offset': -20},
   'x': {'type': 'int',
    'kind': 'parameter',
    'location': '(DW_OP_fbreg: -44)',
    'offset': -24},
   'd': {'type': 'int',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -32)',
    'offset': -12},
   'e': {'type': 'int',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -28)',
    'offset': -8},
   'f': {'type': 'int',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -24)',
    'offset': -4},
   'g': {'type': 'int',
    'kind': 'variable',
    'location': '(DW_OP_fbreg: -20)',
    'offset': 0}},
  'pop': {'stk_ptr': {'type': '**stack',
    'kind': 'parameter',
    'location': '(DW_OP_fbreg: -56)',
    'offset': -32},
   'int1': {'type': 'int',
    'k

In [7]:
#!/usr/bin/env python3

import sys,os
from elftools.elf.elffile import ELFFile
from elftools.elf.segments import Segment






In [8]:

fh = open(filePath, 'rb')
bin_bytearray = bytearray(fh.read())

In [9]:
# https://www.capstone-engine.org/lang_python.html



from capstone import *

from capstone.x86 import *


address_inst = {}


with open(filePath, 'rb') as f:
    elf = ELFFile(f)
    dwarfinfo = elf.get_dwarf_info()
    aranges = dwarfinfo.get_aranges()
    print(len(aranges.entries))
    for arange in aranges.entries:
        print(arange)
    for arange in aranges.entries:

        entry = arange.begin_addr
        exit  = arange.begin_addr + arange.length
        ops = bin_bytearray[entry: exit]

        md = Cs(CS_ARCH_X86, CS_MODE_64)
        md.detail = True
        for inst in md.disasm(ops, entry):

            address_inst[hex(inst.address)] = inst


            print('\n'*3)
            print(inst.address, inst.mnemonic+"  "+inst.op_str)
            (regs_read, regs_write) = inst.regs_access()



3
ARangeEntry(begin_addr=4521, length=488, info_offset=0, unit_length=44, version=2, address_size=8, segment_size=0)
ARangeEntry(begin_addr=5009, length=415, info_offset=990, unit_length=44, version=2, address_size=8, segment_size=0)
ARangeEntry(begin_addr=5424, length=200, info_offset=1562, unit_length=44, version=2, address_size=8, segment_size=0)




4521 endbr64  




4525 push  rbp




4526 mov  rbp, rsp




4529 sub  rsp, 0x30




4533 mov  dword ptr [rbp - 0x24], edi




4536 mov  qword ptr [rbp - 0x30], rsi




4540 mov  dword ptr [rbp - 0x18], 0xa




4547 mov  dword ptr [rbp - 0x14], 0xc




4554 mov  rax, qword ptr [rbp - 0x30]




4558 mov  rax, qword ptr [rax]




4561 mov  qword ptr [rbp - 0x10], rax




4565 mov  edi, 0x10




4570 call  0x10b0




4575 mov  qword ptr [rbp - 8], rax




4579 mov  rax, qword ptr [rbp - 8]




4583 mov  edx, dword ptr [rbp - 0x24]




4586 mov  dword ptr [rax], edx




4588 mov  rax, qword ptr [rbp - 8]




4592 mov  rdx, qword ptr [rbp - 

In [10]:
from collections import defaultdict
import posixpath


In [11]:
addr_lineProgram ={}
addr_sourceFile = {}
with open(filePath, 'rb') as f:
    elffile = ELFFile(f)

    if not elffile.has_dwarf_info():
        print('  file has no DWARF info')
        exit(0)

    dwarfinfo = elffile.get_dwarf_info()
    for CU in dwarfinfo.iter_CUs():
#         print(CU.get_top_DIE()['DW_AT_comp_dir'])
        CU_DIR_PATH = None
        CU_FILENAME = None
        for attr in CU.get_top_DIE().attributes.values():
            if attr.name == 'DW_AT_comp_dir':
                CU_DIR_PATH = attr.value.decode("utf-8")
            if attr.name == 'DW_AT_name':
                CU_FILENAME = attr.value.decode("utf-8")
            
        print('  Found a compile unit at offset %s, length %s' % (
            CU.cu_offset, CU['unit_length']))

        # Every compilation unit in the DWARF information may or may not
        # have a corresponding line program in .debug_line.
        line_program = dwarfinfo.line_program_for_CU(CU)
        if line_program is None:
            print('  DWARF info is missing a line program for this CU')
            continue

        for line_entry in line_program.get_entries():
            real_source_path = None
            if line_entry.state!=None:
                    addr_lineProgram[hex(line_entry.state.address)] = line_entry
#                     print('\n\n\n')
#                     print(hex(line_entry.state.address))
#                     src_filename_from_lineentry, dir_index = (lpe_filename (line_program, line_entry.state.file,CU))
#                     print('src_filename_from_lineentry. dir idx: ',src_filename_from_lineentry,dir_index)
#                     print('CU ',CU_FILENAME , ' line: ',src_filename_from_lineentry)
#                     if src_filename_from_lineentry == CU_FILENAME:
#                         real_source_path = os.path.join(CU_DIR_PATH,CU_FILENAME)
#                     else:
#                         if '/' in src_filename_from_lineentry:
#                             real_source_path = src_filename_from_lineentry
#                         elif dir_index==0:
#                             real_source_path = os.path.join(CU_DIR_PATH,src_filename_from_lineentry)
#                     print("CU: ",CU_FILENAME, " calculated: ",real_source_path)
#                     print("->CU ->", os.path.join(CU_DIR_PATH,CU_FILENAME))
                    addr_sourceFile [hex(line_entry.state.address)] = os.path.join(CU_DIR_PATH,CU_FILENAME)


        


  Found a compile unit at offset 0, length 986
  Found a compile unit at offset 990, length 568
  Found a compile unit at offset 1562, length 484


In [12]:
# print(addr_lineProgram)
# address_inst

def getSource(sourceFilePath, row , col):
    print(sourceFilePath, row , col)
    sourceFile = open(sourceFilePath, "r")
    fileContent = sourceFile.readlines()
    row_content =  fileContent[row-1]
    row_content = row_content[:(col-1)] + '|'+row_content[(col-1)]+'|' +row_content[col:]
    
    return row_content


In [13]:
REGISTER_SUBSTRACT_FACTOR = -4
dir_path = './../../binaries/c_many/'
with open(binFileName+'.s', 'w') as outFile:
    # outFile.write('file contents\n')
    lastSource = ""
    for address in address_inst:

        print(address)
        inst = address_inst[address]
        instrctionCode = (address+":\t"+ inst.mnemonic+" "+inst.op_str).ljust(45)

        OFFSET = None
        if len(inst.operands) > 0 :
            c=-1
            for o in inst.operands:
                c += 1
                if o.type == CS_OP_MEM:
                    print("\t\toperands[%u].type: MEM" %c)
                    if o.value.mem.base != 0:
                        print("\t\t\toperands[%u].mem.base: REG = %s" \
                            %(c, inst.reg_name(o.value.mem.base)))
                    if o.value.mem.index != 0:
                        print("\t\t\toperands[%u].mem.index: REG = %s" \
                            %(c, inst.reg_name(o.value.mem.index)))
                    if o.value.mem.disp != 0:
                        print("\t\t\toperands[%u].mem.disp: 0x%x" \
                            %(c, o.value.mem.disp))
                        OFFSET = o.value.mem.disp
                    print(hex(o.value.mem.disp),o.value.mem.disp)
                    
        
        if address in addr_lineProgram:
            
            srcFilePath = addr_sourceFile[address] 
            if srcFilePath!=lastSource:
                outFile.write("\n"+ '#'*100+"\n"+ srcFilePath.rjust(45) +'\n'+'#'*100+ "\n\n")
                lastSource = srcFilePath
            print("add",address)
            

            sourceCode = getSource(srcFilePath,addr_lineProgram[address].state.line, addr_lineProgram[address].state.column)
            function_name = get_containing_function(srcFilePath ,addr_lineProgram[address].state.line , addr_lineProgram[address].state.column)


            if '\n' not in  sourceCode:
                sourceCode+=sourceCode+"\n"
            outFile.write(instrctionCode+"#"+ sourceCode  )
            print(instrctionCode+"#"+ sourceCode)
            
#             line = addr_lineProgram[address]

#             srcFileName =list(addr_sourceFile[address].keys())[0] #TODO not single file always 
            
#             if srcFileName!=lastSource:
#                 outFile.write("\n"+ '#'*100+"\n"+ srcFileName.rjust(45) +'\n'+'#'*100+ "\n\n")
#                 lastSource = srcFileName
            
#             sourceCode = getSource(srcFileName,addr_lineProgram[address].state.line, addr_lineProgram[address].state.column)
#             function_name = get_containing_function(dir_path+srcFileName ,addr_lineProgram[address].state.line , addr_lineProgram[address].state.column)
#             print(function_name)
            
#             if '\n' not in  sourceCode:
#                 sourceCode+=sourceCode+"\n"
#             outFile.write(instrctionCode+"#"+ sourceCode  )
#             print(instrctionCode+"#"+ sourceCode)

            
        else:
            
            outFile.write(instrctionCode+ '\n'  )
            print(instrctionCode)
        if OFFSET:
            outFile.write("MEMORY OFFSET:     "+str(hex(OFFSET))+"     "+str(OFFSET)+ "  >>"+str(OFFSET-REGISTER_SUBSTRACT_FACTOR)+'\n\n')
            pass

0x11a9
add 0x11a9
/home/nahid/reverse/binaries/c_many/stack.c 6 40
0x11a9:	endbr64                              #void push(int number, stack **stk_ptr) |{|

0x11ad
0x11ad:	push rbp                             
0x11ae
0x11ae:	mov rbp, rsp                         
0x11b1
0x11b1:	sub rsp, 0x30                        
0x11b5
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-24
-0x24 -36
0x11b5:	mov dword ptr [rbp - 0x24], edi      
0x11b8
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-30
-0x30 -48
0x11b8:	mov qword ptr [rbp - 0x30], rsi      
0x11bc
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-18
-0x18 -24
add 0x11bc
/home/nahid/reverse/binaries/c_many/stack.c 7 9
0x11bc:	mov dword ptr [rbp - 0x18], 0xa      #    int |p|op = 10;

0x11c3
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-14
-0x14 -20
add 0x11c3
/home/nahid/reverse/binarie

0x12a0:	mov dword ptr [rbp - 0x6c], 0        #        int x1,x2,x3,x4,x5,|x|6 = 0;

0x12a7
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-78
-0x78 -120
add 0x12a7
/home/nahid/reverse/binaries/c_many/stack.c 31 18
0x12a7:	mov dword ptr [rbp - 0x78], 0        #        for (int |k|=0; k<20;k++){

0x12ae
add 0x12ae
/home/nahid/reverse/binaries/c_many/stack.c 31 9
0x12ae:	jmp 0x12c4                           #        |f|or (int k=0; k<20;k++){

0x12b0
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-74
-0x74 -116
add 0x12b0
/home/nahid/reverse/binaries/c_many/stack.c 32 31
0x12b0:	mov eax, dword ptr [rbp - 0x74]      #            int_arr[k] = e_int|%|k;

0x12b3
0x12b3:	cdq                                  
0x12b4
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-78
-0x78 -120
0x12b4:	idiv dword ptr [rbp - 0x78]          
0x12b7
		operands[1].type: MEM
			operands[1].mem.base: R

0x1372:	mov edx, dword ptr [rbp - 0x14]      #    int g = a|+|b+d+e+f;

0x1375
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-18
-0x18 -24
0x1375:	mov eax, dword ptr [rbp - 0x18]      
0x1378
0x1378:	add edx, eax                         
0x137a
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-10
-0x10 -16
add 0x137a
/home/nahid/reverse/binaries/c_many/stack.c 59 16
0x137a:	mov eax, dword ptr [rbp - 0x10]      #    int g = a+b|+|d+e+f;

0x137d
0x137d:	add edx, eax                         
0x137f
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-c
-0xc -12
add 0x137f
/home/nahid/reverse/binaries/c_many/stack.c 59 18
0x137f:	mov eax, dword ptr [rbp - 0xc]       #    int g = a+b+d|+|e+f;

0x1382
0x1382:	add edx, eax                         
0x1384
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-8
-0x8 -8
add 0x1384
/home/nahid/reverse/bin

0x148f:	lea rax, [rbp - 0x20]                #    |p|rintf("%d\n",pop(&stk));

0x1493
0x1493:	mov rdi, rax                         
0x1496
0x1496:	call 0x12ea                          
0x149b
0x149b:	mov esi, eax                         
0x149d
		operands[1].type: MEM
			operands[1].mem.base: REG = rip
			operands[1].mem.disp: 0xb74
0xb74 2932
0x149d:	lea rax, [rip + 0xb74]               
0x14a4
0x14a4:	mov rdi, rax                         
0x14a7
0x14a7:	mov eax, 0                           
0x14ac
0x14ac:	call 0x10a0                          
0x14b1
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-20
-0x20 -32
add 0x14b1
/home/nahid/reverse/binaries/c_many/main.c 26 5
0x14b1:	lea rax, [rbp - 0x20]                #    |p|rintf("%d\n",pop(&stk));

0x14b5
0x14b5:	mov rdi, rax                         
0x14b8
0x14b8:	call 0x12ea                          
0x14bd
0x14bd:	mov esi, eax                         
0x14bf
		operands[1].type: MEM
			operands[1]

0x15a4:	pop rbp                              #|}|

0x15a5
0x15a5:	ret                                  
0x15a6
add 0x15a6
/home/nahid/reverse/binaries/c_many/calculate.c 18 29
0x15a6:	endbr64                              #float addf(float a, float b)|{|

0x15aa
0x15aa:	push rbp                             
0x15ab
0x15ab:	mov rbp, rsp                         
0x15ae
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-4
-0x4 -4
0x15ae:	movss dword ptr [rbp - 4], xmm0      
0x15b3
		operands[0].type: MEM
			operands[0].mem.base: REG = rbp
			operands[0].mem.disp: 0x-8
-0x8 -8
0x15b3:	movss dword ptr [rbp - 8], xmm1      
0x15b8
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-4
-0x4 -4
add 0x15b8
/home/nahid/reverse/binaries/c_many/calculate.c 19 13
0x15b8:	movss xmm0, dword ptr [rbp - 4]      #    return a|+|b;

0x15bd
		operands[1].type: MEM
			operands[1].mem.base: REG = rbp
			operands[1].mem.disp: 0x-8
-0x8 -8
0