In [1]:
#!/usr/bin/env python3

import sys,os
from elftools.elf.elffile import ELFFile
from elftools.elf.segments import Segment



filePath = './../../binaries/c_many/stacktest'


In [2]:
from capstone import *

address_inst = {}
with open(filePath, 'rb') as f:
    elf = ELFFile(f)
    code = elf.get_section_by_name('.text')
    ops = code.data()
    print('code data size: ',code.data_size)
    addr = code['sh_addr']
    md = Cs(CS_ARCH_X86, CS_MODE_64)
    for i in md.disasm(ops, addr):        
        address_inst[hex(i.address)] = i

code data size:  943


In [3]:
from collections import defaultdict
import posixpath


In [4]:
def line_entry_mapping(line_program):
    filename_map = defaultdict(int)

    # The line program, when decoded, returns a list of line program
    # entries. Each entry contains a state, which we'll use to build
    # a reverse mapping of filename -> #entries.
    lp_entries = line_program.get_entries()
    for lpe in lp_entries:
        # We skip LPEs that don't have an associated file.
        # This can happen if instructions in the compiled binary
        # don't correspond directly to any original source file.
        if not lpe.state or lpe.state.file == 0:
            continue
        filename = lpe_filename(line_program, lpe.state.file)
        filename_map[filename] += 1

    for filename, lpe_count in filename_map.items():
        print("    filename=%s -> %d entries" % (filename, lpe_count))
    return filename_map

def lpe_filename(line_program, file_index):

    lp_header = line_program.header
    file_entries = lp_header["file_entry"]

    # File and directory indices are 1-indexed.
    file_entry = file_entries[file_index - 1]
    dir_index = file_entry["dir_index"]

    # A dir_index of 0 indicates that no absolute directory was recorded during
    # compilation; return just the basename.
    if dir_index == 0:
        return file_entry.name.decode()

    directory = lp_header["include_directory"][dir_index - 1]
    return posixpath.join(directory, file_entry.name).decode()



In [5]:
addr_lineProgram ={}
addr_sourceFile = {}
with open(filePath, 'rb') as f:
    elffile = ELFFile(f)

    if not elffile.has_dwarf_info():
        print('  file has no DWARF info')
        exit(0)

    dwarfinfo = elffile.get_dwarf_info()
    for CU in dwarfinfo.iter_CUs():
        print('  Found a compile unit at offset %s, length %s' % (
            CU.cu_offset, CU['unit_length']))

        # Every compilation unit in the DWARF information may or may not
        # have a corresponding line program in .debug_line.
        line_program = dwarfinfo.line_program_for_CU(CU)
        if line_program is None:
            print('  DWARF info is missing a line program for this CU')
            continue

        # Print a reverse mapping of filename -> #entries
        filename_map = line_entry_mapping(line_program)
        for line_entry in line_program.get_entries():

            if line_entry.state!=None:
                addr_lineProgram[hex(line_entry.state.address)] = line_entry
                addr_sourceFile [hex(line_entry.state.address)] = filename_map

        
        


  Found a compile unit at offset 0, length 457
    filename=stack.c -> 21 entries
  Found a compile unit at offset 461, length 350
    filename=main.c -> 16 entries
  Found a compile unit at offset 815, length 446
    filename=calculate.c -> 26 entries


In [46]:
# print(addr_lineProgram)
# address_inst

def getSource(sourceFileName="stack.c", row=5 , col=5):
    basePath = "/home/nahid/reverse/binaries/c_many/"
    sourceFilePath = os.path.join(basePath , sourceFileName)
    sourceFile = open(sourceFilePath, "r")
    fileContent = sourceFile.readlines()
    
    row_content =  fileContent[row-1]
    row_content = row_content[:(col-1)] + "@" +row_content[col-1:]
    
    return row_content


In [47]:

# FUNCTION_DECL
# https://stackoverflow.com/questions/43460605/function-boundary-identification-using-libclang
#https://eli.thegreenplace.net/2011/07/03/parsing-c-in-python-with-clang


import clang.cindex

def get_function_boundaries(source_path):
    
    function_boundary_by_name = {}
    idx = clang.cindex.Index.create()
    tu = idx.parse(source_path)
    
    for f in tu.cursor.walk_preorder():
        if f.kind == clang.cindex.CursorKind.FUNCTION_DECL:
            
            function_name = f.displayname.split('(')[0]
            function_boundary_by_name[function_name]={}
            function_boundary_by_name[function_name] = { 'src_path':f.extent.start.file.name,
                              'src_file':f.extent.start.file.name.split('/')[-1],
                              'start_line':f.extent.start.line,
                              'start_col':f.extent.start.column,
                              'end_line':f.extent.end.line,
                              'end_col':f.extent.end.column}
    return function_boundary_by_name

def get_containing_function(source_file_path, line, col=0):
    function_boundary_by_name = get_function_boundaries(source_file_path)
    
    for function_name, item in function_boundary_by_name.items():
        if item['src_path'] == source_file_path:
            if line>= item['start_line'] and line<= item['end_line']:
                return function_name
        

# get_containing_function('/home/nahid/reverse/binaries/c_many/main.c' , 10)


In [48]:

with open('stacktest.s', 'w') as outFile:
    # outFile.write('file contents\n')
    lastSource = ""
    for address in address_inst:
        inst = address_inst[address]
        instrctionCode = (address+":\t"+ inst.mnemonic+" "+inst.op_str).ljust(45)
        
        if address in addr_lineProgram:
            line = addr_lineProgram[address]

            srcFileName =list(addr_sourceFile[address].keys())[0] #TODO not single file always 
            
            if srcFileName!=lastSource:
                outFile.write("\n"+ '#'*100+"\n"+ srcFileName.rjust(45) +'\n'+'#'*100+ "\n\n")
                lastSource = srcFileName
            
            sourceCode = getSource(srcFileName,addr_lineProgram[address].state.line, addr_lineProgram[address].state.column)
            

            
            if '\n' not in  sourceCode:
                sourceCode+=sourceCode+"\n"
            outFile.write(instrctionCode+"#"+ sourceCode  )
            print(instrctionCode+"#"+ sourceCode)
            
        else:
            
            outFile.write(instrctionCode+ '\n'  )
#             print(instrctionCode)
    

0x11a9:	endbr64                              #void push(int number, stack **stk_ptr) @{

0x11bc:	mov dword ptr [rbp - 0x18], 0xa      #    int @pop = 10;

0x11c3:	mov dword ptr [rbp - 0x14], 0xc      #    unsigned int @ming = 12;

0x11ca:	mov rax, qword ptr [rbp - 0x30]      #    stk @= *stk_ptr;

0x11d5:	mov edi, 0x10                        #    tmp = @malloc(sizeof(stack));

0x11e3:	mov rax, qword ptr [rbp - 8]         #    tmp->number @= number;

0x11ec:	mov rax, qword ptr [rbp - 8]         #    tmp->next @= stk;

0x11f8:	mov rax, qword ptr [rbp - 8]         #    stk @= tmp;

0x1200:	mov rax, qword ptr [rbp - 0x30]      #    *stk_ptr @= stk;

0x120b:	nop                                  #@}

0x120e:	endbr64                              #int pop(stack **stk_ptr) @{

0x121e:	mov dword ptr [rbp - 0x18], 0xb      #    int @pop =11;

0x1225:	mov rax, qword ptr [rbp - 0x28]      #    stk @= *stk_ptr;

0x1230:	mov rax, qword ptr [rbp - 0x10]      #    tmp @= stk;

0x1238:	mov rax, qword pt

{'remove': {'src_path': '/usr/include/stdio.h',
  'src_file': 'stdio.h',
  'start_line': 152,
  'start_col': 1,
  'end_line': 152,
  'end_col': 51},
 'rename': {'src_path': '/usr/include/stdio.h',
  'src_file': 'stdio.h',
  'start_line': 154,
  'start_col': 1,
  'end_line': 154,
  'end_col': 65},
 'renameat': {'src_path': '/usr/include/stdio.h',
  'src_file': 'stdio.h',
  'start_line': 158,
  'start_col': 1,
  'end_line': 159,
  'end_col': 34},
 'fclose': {'src_path': '/usr/include/stdio.h',
  'src_file': 'stdio.h',
  'start_line': 178,
  'start_col': 1,
  'end_line': 178,
  'end_col': 35},
 'tmpfile': {'src_path': '/usr/include/stdio.h',
  'src_file': 'stdio.h',
  'start_line': 188,
  'start_col': 1,
  'end_line': 189,
  'end_col': 23},
 'tmpnam': {'src_path': '/usr/include/stdio.h',
  'src_file': 'stdio.h',
  'start_line': 205,
  'start_col': 1,
  'end_line': 205,
  'end_col': 45},
 'tmpnam_r': {'src_path': '/usr/include/stdio.h',
  'src_file': 'stdio.h',
  'start_line': 210,
  'star