In [18]:
class Instr(object):
    def __init__(self, function, addr, mnemonic, op_str):
        self.function = function
        self.addr = addr
        self.mnemonic = mnemonic
        self.op_str = op_str
    def __str__(self):
        return f'{self.function}@{hex(self.addr)}: {self.mnemonic} {self.op_str}'

class Line(object):
    def __init__(self, filename, lineno, addr):
        self.filename = filename
        self.lineno = lineno
        self.addr = addr
    def __str__(self):
        return f'{self.filename}:{self.lineno}({hex(self.addr)})'

In [19]:
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection

file = open("libftfp.so", "rb")
elf = ELFFile(file)
sections = list(elf.iter_sections())
text_section = [s for s in sections if s.name == ".text"][0]

In [20]:
symbol_tables = [s for s in sections if isinstance(s, SymbolTableSection)]
dynsym = [s for s in symbol_tables if s.name == ".dynsym"][0];

In [21]:
functions = [sym for _, sym in enumerate(dynsym.iter_symbols()) if sym['st_info']['type'] == "STT_FUNC" and sym.name[:4] == "fix_"]

In [22]:
def get_function_code(function):
    begin_idx = function['st_value'] - text_section['sh_offset']
    end_idx = begin_idx + function['st_size']
    return text_section.data()[begin_idx:end_idx]

In [23]:
general_purpose_regs = ["W" + str(i) for i in range(0, 31)] + ["X" + str(i) for i in range(0, 31)] + ["SP", "LR", "XZR"]
dit_instrs = ['ADC', 'ADCS', 'ADD', 'ADDS', 'AND', 'ANDS', 'ASR', 'ASRV', 'BFC', 'BFI', 'BFM', 'BFXIL', 'BIC', 'BICS', 'CCMN', 'CCMP', 'CFINV', 'CINC', 'CINV', 'CLS', 'CLZ', 'CMN', 'CMP', 'CNEG', 'CSEL', 'CSET', 'CSETM', 'CSINC', 'CSINV', 'CSNEG', 'EON', 'EOR', 'EXTR', 'LSL', 'LSLV', 'LSR', 'LSRV', 'MADD', 'MNEG', 'MOV', 'MOVK', 'MOVN', 'MOVZ', 'MSUB', 'MUL', 'MVN', 'NEG', 'NEGS', 'NGC', 'NGCS', 'NOP', 'ORN', 'ORR', 'RBIT', 'RET', 'REV', 'REV16', 'REV32', 'REV64', 'RMIF', 'ROR', 'RORV', 'SBC', 'SBCS', 'SBFIZ', 'SBFM', 'SBFX', 'SETF8', 'SETF16', 'SMADDL', 'SMNEGL', 'SMSUBL', 'SMULH', 'SMULL', 'SUB', 'SUBS', 'SXTB', 'SXTH', 'SXTW', 'TST', 'UBFIZ', 'UBFM', 'UBFX', 'UMADDL', 'UMNEGL', 'UMSUBL', 'UMULH', 'UMULL', 'UXTB', 'UXTH'] # these are the instructions for the general-purpose register file
dit_simd_instrs = ["ABS", "ADD", "ADDHN", "ADDHN2", "ADDP", "ADDV", "AND", "BIC", "BIF", "BIT", "BSL", "CLS", "CLZ", "CMEQ", "CMGE", "CMGT", "CMHI", "CMHS", "CMLE", "CMLT", "CMTST", "CNT", "CRC32B", "CRC32H", "CRC32W", "CRC32X", "CRC32CB", "CRC32CH", "CRC32CW", "CRC32CX", "DUP", "EOR", "EXT", "FCSEL", "INS", "MLA", "MLS", "MOV", "MOVI", "MUL", "MVN", "MVNI", "NEG", "NOT", "ORN", "ORR", "PMUL", "PMULL", "PMULL2", "RADDHN", "RADDHN2", "RBIT", "REV16", "REV32", "RSHRN", "RSHRN2", "RSUBHN", "RSUBHN2", "SABA", "SABD", "SABAL", "SABAL2", "SABDL", "SABDL2", "SADALP", "SADDL", "SADDL2", "SADDLP", "SADDLV", "SADDW", "SADDW2", "SHADD", "SHL", "SHLL", "SHLL2", "SHRN", "SHRN2", "SHSUB", "SLI", "SMAX", "SMAXP", "SMAXV", "SMIN", "SMINP", "SMINV", "SMLAL", "SMLAL2", "SMLSL", "SMLSL2", "SMOV", "SMULL", "SMULL2", "SRI", "SSHL", "SSHLL", "SSHLL2", "SSHR", "SSRA", "SSUBL", "SSUBL2", "SSUBW", "SSUBW2", "SUB", "SUBHN", "SUBHN2", "SXTL", "SXTL2", "TBL", "TBX", "TRN1", "TRN2", "UABA", "UABAL", "UABAL2", "UABD", "UABDL", "UABDL2", "UADALP", "UADDL", "UADDL2", "UADDLP", "UADDLV", "UADDW", "UADDW2", "UHADD", "UHSUB", "UMAX", "UMAXP", "UMAXV", "UMIN", "UMINP", "UMINV", "UMLAL", "UMLAL2", "UMLSL", "UMOV", "UMLSL2", "UMULL", "UMULL2", "USHL", "USHLL", "USHLL2", "USHR", "USRA", "USUBL", "USUBL2", "USUBW", "USUBW2", "UXTL", "UXTL2", "UZP1", "UZP2", "XTN", "XTN2", "ZIP1", "ZIP2"] # these are the instructions for the SIMD & FP register file

In [24]:
from capstone import *

bad_instrs = []
all_instrs = []
md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)

for function in functions:
    code = get_function_code(function)
    print(function.name, len(code))
    
    instrs = [i for i in md.disasm_lite(code, function['st_value'])]
    
    if(len(instrs) != len(code) / 4): 
        print("invalid disassembly for " + function.name)
        continue
    
    for (addr, size, mnemonic, op_str) in instrs:
        if(op_str == '' or op_str.split(",")[0].upper() in general_purpose_regs):
            if(mnemonic.upper() not in dit_instrs):
                bad_instrs += [Instr(function.name, addr, mnemonic, op_str)]
        elif(mnemonic.upper() not in dit_simd_instrs):
            bad_instrs += [Instr(function.name, addr, mnemonic, op_str)]
        all_instrs += [Instr(function.name, addr, mnemonic, op_str)]

fix_cos 4492
fix_ceil64 68
fix_floor 60
fix_ne 64
fix_cmp 188
fix_sprint 13432
fix_div 4244
fix_convert_to_int64 88
fix_gt 208
fix_convert_from_int64 52
fix_sqrt 85088
fix_le 208
fix_is_neg 8
fix_eq 56
fix_convert_to_double_internal 336
fix_is_inf_neg 16
fix_floor64 60
fix_print 60
fix_sin 4508
fix_round_up_int64 68
fix_mul 420
fix_ln 6068
fix_neg 72
fix_ceil 92
fix_abs 88
fix_is_inf_pos 16
fix_println 36
fix_lt 192
fix_convert_from_double_internal 424
fix_tan 8632
fix_pow 18432
fix_eq_nan 52
fix_exp 11024
fix_ge 204
fix_sub 200
fix_log10 6100
fix_is_nan 16
fix_log2 6156
fix_add 132


In [25]:
set([i.mnemonic for i in bad_instrs])

{'adrp', 'b.ne', 'bl', 'ldp', 'ldr', 'ldrb', 'ldur', 'stp', 'str', 'strb'}

In [26]:
from bisect import bisect_left

dwarf = elf.get_dwarf_info()
def get_lines_from_cu(cu):
    res = []
    lineprogram = dwarf.line_program_for_CU(cu)
    for entry in lineprogram.get_entries():
        state = entry.state
        if state is not None:
            res += [Line(lineprogram['file_entry'][state.file-1].name, state.line, state.address)]
    return res

def get_lines_from_dwarf(dwarf):
    res = []
    for cu in dwarf.iter_CUs():
        res += get_lines_from_cu(cu)
    return sorted(res, key=lambda l: l.addr)

def find_line(lines, addr):
    exact_matches = [l for l in filter(lambda line: line.addr == addr, lines)]
    if(len(exact_matches) != 0):
        return exact_matches
    addrs = [l.addr for l in lines]
    return [lines[bisect_left(addrs, addr) - 1]]

lines = get_lines_from_dwarf(dwarf)

In [27]:
[(str(i), str(find_line(lines, i.addr)[0])) for i in all_instrs if i.mnemonic == 'ccmp']


'power.c':100(0x2a1bc)"),
 ('fix_pow@0x2a3e4: ccmp x27, x0, #4, ne', "b'power.c':100(0x2a1bc)"),
 ('fix_pow@0x2a3f8: ccmp x5, x0, #4, ne', "b'power.c':100(0x2a1bc)"),
 ('fix_pow@0x2a408: ccmp x25, x0, #4, ne', "b'power.c':100(0x2a1bc)"),
 ('fix_pow@0x2a41c: ccmp x26, x0, #4, ne', "b'power.c':100(0x2a1bc)"),
 ('fix_pow@0x2a42c: ccmp x2, x0, #4, ne', "b'power.c':100(0x2a1bc)"),
 ('fix_pow@0x2a440: ccmp x27, x0, #4, ne', "b'power.c':100(0x2a1bc)"),
 ('fix_pow@0x2a44c: ccmp x24, x0, #4, ne', "b'power.c':100(0x2a44c)"),
 ('fix_pow@0x2a45c: ccmp x23, x0, #4, ne', "b'power.c':100(0x2a45c)"),
 ('fix_pow@0x2a468: ccmp x22, x0, #4, ne', "b'power.c':100(0x2a468)"),
 ('fix_pow@0x2a478: ccmp x21, x0, #4, ne', "b'power.c':100(0x2a478)"),
 ('fix_pow@0x2a484: ccmp x20, x0, #4, ne', "b'power.c':100(0x2a484)"),
 ('fix_pow@0x2a494: ccmp x19, x0, #4, ne', "b'power.c':100(0x2a494)"),
 ('fix_pow@0x2a4a0: ccmp x30, x0, #4, ne', "b'power.c':100(0x2a4a0)"),
 ('fix_pow@0x2a4b0: ccmp x18, x0, #4, ne', "b'power.c

In [28]:
predicated = ['ccmn', 'ccmp', 'cinc', 'cinv', 'cneg', 'csel', 'cset', 'csetm', 'csinc', 'csinv', 'csneg']
tested_funcs = ["fix_neg", "fix_abs", "fix_is_neg", "fix_is_nan", "fix_is_inf_pos", "fix_is_inf_neg", "fix_eq", "fix_eq_nan", "fix_cmp", "fix_add", "fix_sub", "fix_mul", "fix_div", "fix_floor", "fix_ceil", "fix_exp", "fix_ln", "fix_log2", "fix_log10", "fix_pow", "fix_sqrt", "fix_sin", "fix_cos", "fix_tan", "fix_sprint"]

In [33]:
[(func, sum(i.mnemonic in predicated and i.function == func for i in all_instrs)) for func in tested_funcs]

[('fix_neg', 5),
 ('fix_abs', 6),
 ('fix_is_neg', 0),
 ('fix_is_nan', 1),
 ('fix_is_inf_pos', 1),
 ('fix_is_inf_neg', 1),
 ('fix_eq', 7),
 ('fix_eq_nan', 7),
 ('fix_cmp', 19),
 ('fix_add', 12),
 ('fix_sub', 17),
 ('fix_mul', 28),
 ('fix_div', 169),
 ('fix_floor', 3),
 ('fix_ceil', 7),
 ('fix_exp', 427),
 ('fix_ln', 259),
 ('fix_log2', 262),
 ('fix_log10', 259),
 ('fix_pow', 777),
 ('fix_sqrt', 3114),
 ('fix_sin', 158),
 ('fix_cos', 158),
 ('fix_tan', 316),
 ('fix_sprint', 7)]

In [30]:
fix_sin_instrs = set([i.mnemonic for i in all_instrs if i.function == "fix_sin"])
fix_log2_instrs = set([i.mnemonic for i in all_instrs if i.function == "fix_exp"])

[m for m in fix_sin_instrs if m not in fix_log2_instrs]

['bfi', 'bl', 'b.ne', 'madd', 'adrp', 'ldur']

In [32]:
[(str(i), str(find_line(lines, i.addr)[0])) for i in all_instrs if i.mnemonic == 'ldur' and i.function == "fix_sin"]

[('fix_sin@0x6f38: ldur x13, [x5, #-8]', "b'cordic.h':41(0x6f34)")]

In [34]:
file.close()