In [1]:
#!/usr/bin/env python3

import sys,os
from elftools.elf.elffile import ELFFile
from elftools.elf.segments import Segment



filePath = '/home/nahid/reverse/binaries/c_many/stacktest'



with open(filePath, 'rb') as f:
    elffile =  ELFFile(f)
    
    for segment in elffile.iter_segments():
        if segment.header.p_filesz != segment.header.p_memsz:
            seg_head = segment.header
            print(f"Type: {seg_head.p_type}\nOffset: {hex(seg_head.p_offset)}\nSize in file:{hex(seg_head.p_filesz)}\nSize in memory:{hex(seg_head.p_memsz)}")
            
    dwarfinfo = elffile.get_dwarf_info()
    for CU in dwarfinfo.iter_CUs():
        print('  Found a compile unit at offset %s, length %s' % (
            CU.cu_offset, CU['unit_length']))
        

        # Every compilation unit in the DWARF information may or may not
        # have a corresponding line program in .debug_line.
        # line_program = dwarfinfo.line_program_for_CU(CU)
        # if line_program is None:
        #     print('  DWARF info is missing a line program for this CU')
        #     continue

Type: PT_LOAD
Offset: 0x2da0
Size in file:0x270
Size in memory:0x278
  Found a compile unit at offset 0, length 416
  Found a compile unit at offset 420, length 350
  Found a compile unit at offset 774, length 356


In [2]:
def _format_hex( addr, fieldsize=None, fullhex=False, lead0x=True):
    """ Format an address into a hexadecimal string.
        fieldsize:
            Size of the hexadecimal field (with leading zeros to fit the
            address into. For example with fieldsize=8, the format will
            be %08x
            If None, the minimal required field size will be used.
        fullhex:
            If True, override fieldsize to set it to the maximal size
            needed for the elfclass
        lead0x:
            If True, leading 0x is added
    """
    s = '0x' if lead0x else ''
    if fullhex:
        fieldsize = 8 if elffile.elfclass == 32 else 16
    if fieldsize is None:
        field = '%x'
    else:
        field = '%' + '0%sx' % fieldsize
    return s + field % addr

from elftools.elf.descriptions import (
    describe_ei_class, describe_ei_data, describe_ei_version,
    describe_ei_osabi, describe_e_type, describe_e_machine,
    describe_e_version_numeric, describe_p_type, describe_p_flags,
    describe_sh_type, describe_sh_flags,
    describe_symbol_type, describe_symbol_bind, describe_symbol_visibility,
    describe_symbol_shndx, describe_reloc_type, describe_dyn_tag,
    )

In [3]:
with open(filePath, 'rb') as f:
    elffile =  ELFFile(f)
    for nsec, section in enumerate(elffile.iter_sections()):
                print('  [%2u] %-17.17s %-15.15s ' % (
                    nsec, (section.name), describe_sh_type(section['sh_type'])))

                # if elffile.elfclass == 32:
                #     print('%s %s %s %s %3s %2s %3s %2s' % (
                #         _format_hex(section['sh_addr'], fieldsize=8, lead0x=False),
                #         _format_hex(section['sh_offset'], fieldsize=6, lead0x=False),
                #         _format_hex(section['sh_size'], fieldsize=6, lead0x=False),
                #         _format_hex(section['sh_entsize'], fieldsize=2, lead0x=False),
                #         describe_sh_flags(section['sh_flags']),
                #         section['sh_link'], section['sh_info'],
                #         section['sh_addralign']))
                # else: # 64
                print(' sh_addr: %s  sh_offset: %s' % (
                    _format_hex(section['sh_addr'], fullhex=True, lead0x=False),
                    _format_hex(section['sh_offset'],
                        fieldsize=16 if section['sh_offset'] > 0xffffffff else 8,
                        lead0x=False)))
                print('       sh_size: %s  sh_entsize: %s sh_flags:%3s      sh_link:%2s   sh_info:%3s     sh_addralign:%s' % (
                    _format_hex(section['sh_size'], fullhex=True, lead0x=False),
                    _format_hex(section['sh_entsize'], fullhex=True, lead0x=False),
                    describe_sh_flags(section['sh_flags']),
                    section['sh_link'], section['sh_info'],
                    section['sh_addralign']))

  [ 0]                   NULL            
 sh_addr: 0000000000000000  sh_offset: 00000000
       sh_size: 0000000000000000  sh_entsize: 0000000000000000 sh_flags:         sh_link: 0   sh_info:  0     sh_addralign:0
  [ 1] .interp           PROGBITS        
 sh_addr: 0000000000000318  sh_offset: 00000318
       sh_size: 000000000000001c  sh_entsize: 0000000000000000 sh_flags:  A      sh_link: 0   sh_info:  0     sh_addralign:1
  [ 2] .note.gnu.propert NOTE            
 sh_addr: 0000000000000338  sh_offset: 00000338
       sh_size: 0000000000000030  sh_entsize: 0000000000000000 sh_flags:  A      sh_link: 0   sh_info:  0     sh_addralign:8
  [ 3] .note.gnu.build-i NOTE            
 sh_addr: 0000000000000368  sh_offset: 00000368
       sh_size: 0000000000000024  sh_entsize: 0000000000000000 sh_flags:  A      sh_link: 0   sh_info:  0     sh_addralign:4
  [ 4] .note.ABI-tag     NOTE            
 sh_addr: 000000000000038c  sh_offset: 0000038c
       sh_size: 0000000000000020  sh_entsize: 0000

In [4]:
from capstone import *

address_inst = {}
with open(filePath, 'rb') as f:
    elf = ELFFile(f)
    code = elf.get_section_by_name('.text')
    ops = code.data()
    print('code data size: ',code.data_size)
    addr = code['sh_addr']
    md = Cs(CS_ARCH_X86, CS_MODE_64)
    for i in md.disasm(ops, addr):        
        print(f'0x{i.address:x}:\t{i.mnemonic}\t{i.op_str}')
        address_inst[hex(i.address)] = i

code data size:  828
0x10c0:	endbr64	
0x10c4:	xor	ebp, ebp
0x10c6:	mov	r9, rdx
0x10c9:	pop	rsi
0x10ca:	mov	rdx, rsp
0x10cd:	and	rsp, 0xfffffffffffffff0
0x10d1:	push	rax
0x10d2:	push	rsp
0x10d3:	xor	r8d, r8d
0x10d6:	xor	ecx, ecx
0x10d8:	lea	rdi, [rip + 0x175]
0x10df:	call	qword ptr [rip + 0x2ef3]
0x10e5:	hlt	
0x10e6:	nop	word ptr cs:[rax + rax]
0x10f0:	lea	rdi, [rip + 0x2f19]
0x10f7:	lea	rax, [rip + 0x2f12]
0x10fe:	cmp	rax, rdi
0x1101:	je	0x1118
0x1103:	mov	rax, qword ptr [rip + 0x2ed6]
0x110a:	test	rax, rax
0x110d:	je	0x1118
0x110f:	jmp	rax
0x1111:	nop	dword ptr [rax]
0x1118:	ret	
0x1119:	nop	dword ptr [rax]
0x1120:	lea	rdi, [rip + 0x2ee9]
0x1127:	lea	rsi, [rip + 0x2ee2]
0x112e:	sub	rsi, rdi
0x1131:	mov	rax, rsi
0x1134:	shr	rsi, 0x3f
0x1138:	sar	rax, 3
0x113c:	add	rsi, rax
0x113f:	sar	rsi, 1
0x1142:	je	0x1158
0x1144:	mov	rax, qword ptr [rip + 0x2ea5]
0x114b:	test	rax, rax
0x114e:	je	0x1158
0x1150:	jmp	rax
0x1152:	nop	word ptr [rax + rax]
0x1158:	ret	
0x1159:	nop	dword ptr [rax]
0x1160:

In [5]:
# address_inst

In [7]:
#-------------------------------------------------------------------------------
# elftools example: dwarf_lineprogram_filenames.py
#
# In the .debug_line section, the Dwarf line program generates a matrix
# of address-source references. This example demonstrates accessing the state
# of each line program entry to retrieve the underlying filenames.
#
# William Woodruff (william@yossarian.net)
# This code is in the public domain
#-------------------------------------------------------------------------------
from __future__ import print_function
from collections import defaultdict
import os
import sys
import posixpath

# If pyelftools is not installed, the example can also run from the root or
# examples/ dir of the source distribution.
# sys.path[0:0] = ['.', '..']

from elftools.elf.elffile import ELFFile






def line_entry_mapping(line_program):
    filename_map = defaultdict(int)

    # The line program, when decoded, returns a list of line program
    # entries. Each entry contains a state, which we'll use to build
    # a reverse mapping of filename -> #entries.
    lp_entries = line_program.get_entries()
    for lpe in lp_entries:
        # We skip LPEs that don't have an associated file.
        # This can happen if instructions in the compiled binary
        # don't correspond directly to any original source file.
        if not lpe.state or lpe.state.file == 0:
            continue
        filename = lpe_filename(line_program, lpe.state.file)
        filename_map[filename] += 1

    for filename, lpe_count in filename_map.items():
        print("    filename=%s -> %d entries" % (filename, lpe_count))
    return filename_map

def lpe_filename(line_program, file_index):

    lp_header = line_program.header
    file_entries = lp_header["file_entry"]
    
    # print('lp_header', lp_header, '\n\n')
    # print("file_entries", file_entries,'\n\n__________________________________________\n\n')

    # File and directory indices are 1-indexed.
    file_entry = file_entries[file_index - 1]
    dir_index = file_entry["dir_index"]

    # A dir_index of 0 indicates that no absolute directory was recorded during
    # compilation; return just the basename.
    if dir_index == 0:
        return file_entry.name.decode()

    directory = lp_header["include_directory"][dir_index - 1]
    return posixpath.join(directory, file_entry.name).decode()



addr_lineProgram ={}
addr_sourceFile = {}
with open(filePath, 'rb') as f:
    elffile = ELFFile(f)

    if not elffile.has_dwarf_info():
        print('  file has no DWARF info')
        exit(0)

    dwarfinfo = elffile.get_dwarf_info()
    for CU in dwarfinfo.iter_CUs():
        print('  Found a compile unit at offset %s, length %s' % (
            CU.cu_offset, CU['unit_length']))

        # Every compilation unit in the DWARF information may or may not
        # have a corresponding line program in .debug_line.
        line_program = dwarfinfo.line_program_for_CU(CU)
        if line_program is None:
            print('  DWARF info is missing a line program for this CU')
            continue

        # Print a reverse mapping of filename -> #entries
        filename_map = line_entry_mapping(line_program)
        for line_entry in line_program.get_entries():
            print(line_entry)
            if line_entry.state!=None:
                addr_lineProgram[hex(line_entry.state.address)] = line_entry
                addr_sourceFile [hex(line_entry.state.address)] = filename_map
        print("_____________________________________________________")
        
        


  Found a compile unit at offset 0, length 416
    filename=stack.c -> 18 entries
LineProgramEntry(command=5, is_extended=False, args=[40], state=None)
LineProgramEntry(command=2, is_extended=True, args=[4521], state=None)
LineProgramEntry(command=23, is_extended=False, args=[5, 0, 0], state=<LineState 7f94892f1400:
  address = 0x11a9
  file = 1
  line = 6
  column = 40
  is_stmt = 1
  basic_block = False
  end_sequence = False
  prologue_end = False
  epilogue_begin = False
  isa = 0
  discriminator = 0>
)
LineProgramEntry(command=5, is_extended=False, args=[9], state=None)
LineProgramEntry(command=8, is_extended=False, args=[17], state=None)
LineProgramEntry(command=48, is_extended=False, args=[2, 2, 0], state=<LineState 7f94892f1430:
  address = 0x11bc
  file = 1
  line = 8
  column = 9
  is_stmt = 1
  basic_block = False
  end_sequence = False
  prologue_end = False
  epilogue_begin = False
  isa = 0
  discriminator = 0>
)
LineProgramEntry(command=5, is_extended=False, args=[11], s

In [None]:
# print(addr_lineProgram)
# address_inst

def getSource(sourceFileName="stack.c", row=5 , col=5):
    basePath = "/home/nahid/reverse/binaries/c_many/"
    sourceFilePath = os.path.join(basePath , sourceFileName)
    sourceFile = open(sourceFilePath, "r")
    fileContent = sourceFile.readlines()
    
    row_content =  fileContent[row-1]
    
    row_content = row_content[:(col-1)] + "@" +row_content[col:]
    
    # print(row_content)
    
    return row_content


In [None]:
with open(filePath, 'rb') as f:
    elffile = ELFFile(f)

    if not elffile.has_dwarf_info():
        print('  file has no DWARF info')
        exit(0)

    dwarfinfo = elffile.get_dwarf_info()
    arangesInfo = dwarfinfo.debug_aranges_sec
    pubTypes = dwarfinfo.debug_pubtypes_sec
    dwarfinfo.


In [None]:

with open('stacktest.s', 'w') as outFile:
    # outFile.write('file contents\n')
    lastSource = ""
    for address in address_inst:
        inst = address_inst[address]
        instrctionCode = (address+":\t"+ inst.mnemonic+" "+inst.op_str).ljust(45)
        if address in addr_lineProgram:
            line = addr_lineProgram[address]

            srcFileName =list(addr_sourceFile[address].keys())[0] #TODO not single file always 
            
            if srcFileName!=lastSource:
                outFile.write("\n"+ '#'*100+"\n"+ srcFileName.rjust(45) +'\n'+'#'*100+ "\n\n")
                lastSource = srcFileName
            
            sourceCode = getSource(srcFileName,addr_lineProgram[address].state.line, addr_lineProgram[address].state.column)
            if '\n' not in  sourceCode:
                sourceCode+=sourceCode+"\n"
            outFile.write(instrctionCode+"#"+ sourceCode  )
            print(instrctionCode+"#"+ sourceCode)
        else:
            outFile.write(instrctionCode+ '\n'  )
            print(instrctionCode)
    