In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
#from torch.utils.tensorboard import SummaryWriter

#from transformer import Transformer, TOK_LEN, PAD_TOK, MAX_INST_LEN
from stacksyms import checkLabels, generateDebugLabel, getMaxFrameSize, parseELF
file2funcs = dict() # = parseDirectory(debugFiledir)

#debugFilepath = 'data/cross-compile-dataset/bin/static/gcc/og/parallel'
#debugFiledir = 'data/cross-compile-dataset/bin/static/gcc/og'
debugFilepath = 'data/cross-compile-dataset/bin/dynamic/gcc/o2/xrdb'
#debugFiledir = '.'
#debugFilepath = 'data/cross-compile-dataset/bin/dynamic/gcc/og/sum'
#debugFilepath = 'data/spec2006/bzip2'

In [None]:
# TODO get frame size from .eh_frame section and compare func.variables against GDB's 'info scope func.name' output
for fname in next(os.walk(debugFiledir))[2]:
    debugFilepath = debugFiledir + os.sep + fname
    with open(debugFilepath, 'rb') as fstream:
        if fstream.peek(1)[:4].hex() == b'\x7fELF'.hex():
            functions = parseELF(debugFilepath)
            checkLabels(functions)

In [None]:
functions = parseELF(debugFilepath)

In [None]:
import logging; logging.getLogger().setLevel(logging.WARN)
for func in functions:
    frame = getMaxFrameSize(func)
    label = generateDebugLabel(func)
    print(f"{func.name} (frame starts at {func.frame_base}, size is {frame} bytes by offset) => {label}")
    if 0<len(func.inlined_functions):
        print("    inlines ", func.inlined_functions)
        for inlined in func.inlined_functions:
            print("        ", inlined.name, getMaxFrameSize(inlined), generateDebugLabel(inlined))

In [4]:
from stacksyms import parseDWARF, getAllFunctions, collectFrameInfo, assign_frames, processRegisterRuleExpressions, propagateTypeInfo
from elftools.elf.elffile import ELFFile
elf = ELFFile(open(debugFilepath, 'rb'))
module, importer = parseDWARF(elf)
functions = getAllFunctions(module, importer, elf)
frame_tables = collectFrameInfo(functions, elf)
functions = assign_frames(frame_tables, functions)
functions = processRegisterRuleExpressions(functions, importer)
functions = propagateTypeInfo(functions, importer)

03/23/2021 16:21:36 INFO:parseDWARF:ELF file says it has some frame info..
03/23/2021 16:21:36 INFO:getFunctions:Found 22 function symbols.
03/23/2021 16:21:36 INFO:getAllFunctions:Found 88 inlined functions.
03/23/2021 16:21:36 INFO:getAllFunctions:Returning 110 functions total.
03/23/2021 16:21:36 INFO:collectFrameInfo:has .eh_frames


In [5]:
for func in functions:
    if func.frame_table is None:
        print(func.name, func.is_inline)

vfprintf True
snprintf True
GetEntriesString True
EditFile True
GetEntriesString True
ReadFile True
putchar True
DoCmdDefines True
printf True
printf True
printf True
strcpy True
fgets True
InitEntries True
ShuffleEntries True
printf True
printf True
printf True
printf True
GetEntriesString True
printf True
printf True
open True
DoDisplayDefines True
InitBuffer True
printf True
printf True
printf True
printf True
printf True
printf True
strcat True
fprintf True
FindFirst True
strncpy True
strncpy True
fprintf True
InitBuffer True
fgets True
FindEntry True
fprintf True
fprintf True
Resolution True
Resolution True
snprintf True
snprintf True
fprintf True
fprintf True
AddSimpleDef True
AddSimpleDef True
strncpy True
strcpy True
InitBuffer True
fgets True
FindEntry True
fprintf True
fprintf True
Resolution True
Resolution True
snprintf True
snprintf True
fprintf True
fprintf True
AddSimpleDef True
AddSimpleDef True
strncpy True
strcpy True


In [None]:
X, Y, Z = [], [], []
for func in functions:
    print('////////////////////////')
    print(func.name, hex(func.start), func.frame_base)
    for lvar in func.variables:
        print(lvar.name, lvar.type, "(bytesize = %d)"%lvar.type.byte_size)
        for loc in lvar.locations:
            print(hex(loc.begin) + " to " + hex(loc.end) + ": " \
            + str(loc.type)[13:]
            + str(loc.expr))
        print('')
#    X += [func] #generateFeature(func, functions)]
#    Y += [generateLabel(func, functions)]
#    Z += [generateDebugLabel(func, functions)]
#print([x+" => "+ str(y) for x,y in zip(X,Z)])

#func = 'quotearg_n_style_colon'

In [None]:
'''Doesnt appear to be super useful for stack symbolization as compilers dont seem to emit relevant information'''
def handleLineprogram(dwarfInfo):
    print('looping through compilation units..')
    for cu in dwarfInfo.iter_CUs():
        lp = dwarfInfo.line_program_for_CU(cu)
        if lp == None:
            print('DWARF info is missing a line program for this CU')
            print(cu.keys())
            continue
        cu_filename = lp['file_entry'][0].name.decode('latin-1')
        if len(lp['include_directory']) > 0:
            dir_index = lp['file_entry'][0].dir_index
            if dir_index > 0:
                idir = lp['include_directory'][dir_index - 1]
            else:
                idir = b'.'
            cu_filename = '%s/%s' % (idir.decode('latin-1'), cu_filename)
        print(f'CU: {cu_filename}')
        #print(f'File name                            Line number    Starting address')
        for entry in lp.get_entries():
            '''
            if entry.state is None:
                # Special handling for commands that don't set a new state
                if entry.command == DW_LNS_set_file:
                    file_entry = lp['file_entry'][entry.args[0] - 1]
                    if file_entry.dir_index == 0:
                        # current directory
                        print('\n./%s:[++]' % (
                            file_entry.name.decode('latin-1')))
                    else:
                        print('\n%s/%s:' % (
                            lp['include_directory'][file_entry.dir_index - 1].decode('latin-1'),
                            file_entry.name.decode('latin-1')))
                elif entry.command == DW_LNE_define_file:
                    print('%s:' % (
                        lp['include_directory'][entry.args[0].dir_index].decode('latin-1')))
                elif entry.command in [DW_LNS_set_basic_block, DW_LNS_set_prologue_end, DW_LNS_set_isa]:
                    print('=============================')
                    print(repr(entry))
                    print('=============================')
                else:
                    print('=============================')
                    print('OTHER COMMAND: ' + str(entry))
                    print('=============================')
            elif not entry.state.end_sequence:
                # readelf doesn't print the state after end_sequence
                # instructions. I think it's a bug but to be compatible
                # I don't print them too.
                if lp['version'] < 4:
                    print('%-35s  %11d  %18s' % (
                        lp['file_entry'][state.file - 1].name.decode('latin-1'),
                        state.line,
                        '0' if state.address == 0 else
                            hex(state.address)))
                else:
                    print('%-35s  %11d  %18s[%d]' % (
                        lp['file_entry'][state.file - 1].name.decode('latin-1'),
                        state.line,
                        '0' if state.address == 0 else
                            hex(state.address),
                        state.op_index))
            if entry.command == DW_LNS_copy:
                # Another readelf oddity...
                print()
            '''
            if entry.command in [DW_LNS_set_basic_block, DW_LNS_set_prologue_end, DW_LNS_set_isa]:
                print('=============================')
                print(repr(entry))
                print('=============================')
        #print(dir(lp))
        #line_entry_mapping(lp)

def line_entry_mapping(line_program):
    import collections
    filename_map = collections.defaultdict(int)
    lp_entries = line_program.get_entries()
    for lpe in lp_entries:
        if not lpe.state or lpe.state.file == 0:
            continue # TODO: instruction doesn't correspond to src
        filename = lpe_filename(line_program, lpe.state.file)
        filename_map[filename] += 1
    for filename, lpe_count in filename_map.items():
        print('%s -> %d entries' % (filename, lpe_count))

def lpe_filename(line_program, file_index):
    lp_header = line_program.header
    print(lp_header)
    file_entries = lp_header["file_entry"]
    file_entry = file_entries[file_index - 1]
    dir_index = file_entry["dir_index"]
    if dir_index == 0:
        return file_entry.name.decode()
    directory = lp_header["include_directory"][dir_index - 1]
    return os.path.join(directory, file_entry.name).decode()

    

#print(elf.get_section_by_name('.text').data())