In [None]:
class Instr(object):
    def __init__(self, function, addr, mnemonic, op_str):
        self.function = function
        self.addr = addr
        self.mnemonic = mnemonic
        self.op_str = op_str
    def __str__(self):
        return f'{self.function}@{hex(self.addr)}: {self.mnemonic} {self.op_str}'

class Line(object):
    def __init__(self, filename, lineno, addr):
        self.filename = filename
        self.lineno = lineno
        self.addr = addr
    def __str__(self):
        return f'{self.filename}:{self.lineno}({hex(self.addr)})'

In [None]:
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection

file = open("libftfp.so", "rb")
elf = ELFFile(file)
sections = list(elf.iter_sections())
text_section = [s for s in sections if s.name == ".text"][0]

In [None]:
symbol_tables = [s for s in sections if isinstance(s, SymbolTableSection)]
dynsym = [s for s in symbol_tables if s.name == ".dynsym"][0];

In [None]:
functions = [sym for _, sym in enumerate(dynsym.iter_symbols()) if sym['st_info']['type'] == "STT_FUNC" and sym.name[:4] == "fix_"]

In [None]:
def get_function_code(function):
    begin_idx = function['st_value'] - text_section['sh_offset']
    end_idx = begin_idx + function['st_size']
    return text_section.data()[begin_idx:end_idx]

In [None]:
general_purpose_regs = ["W" + str(i) for i in range(0, 31)] + ["X" + str(i) for i in range(0, 31)] + ["SP", "LR", "XZR"]
dit_instrs = ['ADC', 'ADCS', 'ADD', 'ADDS', 'AND', 'ANDS', 'ASR', 'ASRV', 'BFC', 'BFI', 'BFM', 'BFXIL', 'BIC', 'BICS', 'CCMN', 'CCMP', 'CFINV', 'CINC', 'CINV', 'CLS', 'CLZ', 'CMN', 'CMP', 'CNEG', 'CSEL', 'CSET', 'CSETM', 'CSINC', 'CSINV', 'CSNEG', 'EON', 'EOR', 'EXTR', 'LSL', 'LSLV', 'LSR', 'LSRV', 'MADD', 'MNEG', 'MOV', 'MOVK', 'MOVN', 'MOVZ', 'MSUB', 'MUL', 'MVN', 'NEG', 'NEGS', 'NGC', 'NGCS', 'NOP', 'ORN', 'ORR', 'RBIT', 'RET', 'REV', 'REV16', 'REV32', 'REV64', 'RMIF', 'ROR', 'RORV', 'SBC', 'SBCS', 'SBFIZ', 'SBFM', 'SBFX', 'SETF8', 'SETF16', 'SMADDL', 'SMNEGL', 'SMSUBL', 'SMULH', 'SMULL', 'SUB', 'SUBS', 'SXTB', 'SXTH', 'SXTW', 'TST', 'UBFIZ', 'UBFM', 'UBFX', 'UMADDL', 'UMNEGL', 'UMSUBL', 'UMULH', 'UMULL', 'UXTB', 'UXTH'] # these are the instructions for the general-purpose register file
dit_simd_instrs = ["ABS", "ADD", "ADDHN", "ADDHN2", "ADDP", "ADDV", "AND", "BIC", "BIF", "BIT", "BSL", "CLS", "CLZ", "CMEQ", "CMGE", "CMGT", "CMHI", "CMHS", "CMLE", "CMLT", "CMTST", "CNT", "CRC32B", "CRC32H", "CRC32W", "CRC32X", "CRC32CB", "CRC32CH", "CRC32CW", "CRC32CX", "DUP", "EOR", "EXT", "FCSEL", "INS", "MLA", "MLS", "MOV", "MOVI", "MUL", "MVN", "MVNI", "NEG", "NOT", "ORN", "ORR", "PMUL", "PMULL", "PMULL2", "RADDHN", "RADDHN2", "RBIT", "REV16", "REV32", "RSHRN", "RSHRN2", "RSUBHN", "RSUBHN2", "SABA", "SABD", "SABAL", "SABAL2", "SABDL", "SABDL2", "SADALP", "SADDL", "SADDL2", "SADDLP", "SADDLV", "SADDW", "SADDW2", "SHADD", "SHL", "SHLL", "SHLL2", "SHRN", "SHRN2", "SHSUB", "SLI", "SMAX", "SMAXP", "SMAXV", "SMIN", "SMINP", "SMINV", "SMLAL", "SMLAL2", "SMLSL", "SMLSL2", "SMOV", "SMULL", "SMULL2", "SRI", "SSHL", "SSHLL", "SSHLL2", "SSHR", "SSRA", "SSUBL", "SSUBL2", "SSUBW", "SSUBW2", "SUB", "SUBHN", "SUBHN2", "SXTL", "SXTL2", "TBL", "TBX", "TRN1", "TRN2", "UABA", "UABAL", "UABAL2", "UABD", "UABDL", "UABDL2", "UADALP", "UADDL", "UADDL2", "UADDLP", "UADDLV", "UADDW", "UADDW2", "UHADD", "UHSUB", "UMAX", "UMAXP", "UMAXV", "UMIN", "UMINP", "UMINV", "UMLAL", "UMLAL2", "UMLSL", "UMOV", "UMLSL2", "UMULL", "UMULL2", "USHL", "USHLL", "USHLL2", "USHR", "USRA", "USUBL", "USUBL2", "USUBW", "USUBW2", "UXTL", "UXTL2", "UZP1", "UZP2", "XTN", "XTN2", "ZIP1", "ZIP2"] # these are the instructions for the SIMD & FP register file

In [None]:
#remove the multiplication instructions
dit_instrs = [i for i in dir_instrs if i != "MADD" and i != "MUL" and i != "MSUB"]

In [7]:
from capstone import *

bad_instrs = []
md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)

for function in functions:
    code = get_function_code(function)
    
    instrs = [i for i in md.disasm_lite(code, function['st_value'])]
    
    if(len(instrs) != len(code) / 4): 
        print("invalid disassembly for " + function.name)
        continue
    
    for (addr, size, mnemonic, op_str) in instrs:
        if(op_str == '' or op_str.split(",")[0].upper() in general_purpose_regs):
            if(mnemonic.upper() not in dit_instrs):
                bad_instrs += [Instr(function.name, addr, mnemonic, op_str)]
        elif(mnemonic.upper() not in dit_simd_instrs):
            bad_instrs += [Instr(function.name, addr, mnemonic, op_str)]

In [8]:
set([i.mnemonic for i in bad_instrs])

{'adrp', 'b', 'b.eq', 'b.ne', 'bl', 'blr', 'ldp', 'ldr', 'stp', 'str', 'strb'}

In [9]:
from bisect import bisect_left

dwarf = elf.get_dwarf_info()
def get_lines_from_cu(cu):
    res = []
    lineprogram = dwarf.line_program_for_CU(cu)
    for entry in lineprogram.get_entries():
        state = entry.state
        if state is not None:
            res += [Line(lineprogram['file_entry'][state.file-1].name, state.line, state.address)]
    return res

def get_lines_from_dwarf(dwarf):
    res = []
    for cu in dwarf.iter_CUs():
        res += get_lines_from_cu(cu)
    return sorted(res, key=lambda l: l.addr)

def find_line(lines, addr):
    exact_matches = [l for l in filter(lambda line: line.addr == addr, lines)]
    if(len(exact_matches) != 0):
        return exact_matches
    addrs = [l.addr for l in lines]
    return [lines[bisect_left(addrs, addr) - 1]]

lines = get_lines_from_dwarf(dwarf)

In [10]:
#stores outside of the stack
[(str(i), str(find_line(lines, i.addr)[0])) for i in bad_instrs if i.mnemonic[0] == 'b']


[('fix_cos@0x3dc8: b.ne #0x3ce4', "b'internal.h':516(0x3dc8)"),
 ('fix_cos@0x4018: b.ne #0x3e64', "b'cordic.h':34(0x4014)"),
 ('fix_sprint@0x1adc: blr x1', "b'autogen.c':14(0x1adc)"),
 ('fix_div@0x1454: b.ne #0x1378', "b'internal.h':593(0x1454)"),
 ('fix_gt@0x1130: bl #0xd60', "b'ftfp.c':80(0x1130)"),
 ('fix_sqrt@0x9950: b #0x99dc', "b'internal.h':557(0x994c)"),
 ('fix_sqrt@0x99d8: b.eq #0x9b7c', "b'power.c':527(0x99d8)"),
 ('fix_sqrt@0x9b74: b.ne #0x9a94', "b'internal.h':593(0x9b74)"),
 ('fix_sqrt@0x9b78: b #0x9954', "b'internal.h':593(0x9b74)"),
 ('fix_le@0x1058: bl #0xd60', "b'ftfp.c':59(0x1058)"),
 ('fix_convert_to_double_internal@0xa0cc: blr x2', "b'double.c':65(0xa0c4)"),
 ('fix_print@0x1a30: blr x3', "b'ftfp.c':364(0x1a28)"),
 ('fix_print@0x1a4c: blr x5', "b'stdio2.h':107(0x1a34)"),
 ('fix_sin@0x3964: b.ne #0x3888', "b'internal.h':516(0x3964)"),
 ('fix_sin@0x3bc4: b.ne #0x3a10', "b'cordic.h':34(0x3bc0)"),
 ('fix_println@0x1a64: bl #0xd50', "b'ftfp.c':368(0x1a64)"),
 ('fix_printl

In [11]:
[(str(i), str(find_line(lines, i.addr)[0])) for i in bad_instrs if i.function == "fix_eq"]

[]

In [12]:
functions

[<elftools.elf.sections.Symbol at 0x7fbfbd15bb80>,
 <elftools.elf.sections.Symbol at 0x7fbfbd15b9d0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc889fd0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d160>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d250>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d340>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d430>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d520>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d610>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d700>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d7f0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d8e0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81d9d0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81dac0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81dbb0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81dca0>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81dd90>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81de80>,
 <elftools.elf.sections.Symbol at 0x7fbfbc81df70>,
 <elftools.elf.sections.Symbol 