In [181]:
class Instr(object):
    def __init__(self, function, addr, mnemonic, op_str):
        self.function = function
        self.addr = addr
        self.mnemonic = mnemonic
        self.op_str = op_str
    def __str__(self):
        return f'{self.function}@{hex(self.addr)}: {self.mnemonic} {self.op_str}'

class Line(object):
    def __init__(self, filename, lineno, addr):
        self.filename = filename
        self.lineno = lineno
        self.addr = addr
    def __str__(self):
        return f'{self.filename}:{self.lineno}({hex(self.addr)})'

In [182]:
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import SymbolTableSection

file = open("libftfp.so", "rb")
elf = ELFFile(file)
sections = list(elf.iter_sections())
text_section = [s for s in sections if s.name == ".text"][0]

In [183]:
symbol_tables = [s for s in sections if isinstance(s, SymbolTableSection)]
dynsym = [s for s in symbol_tables if s.name == ".dynsym"][0];

In [184]:
functions = [sym for _, sym in enumerate(dynsym.iter_symbols()) if sym['st_info']['type'] == "STT_FUNC" and sym.name[:4] == "fix_"]

In [185]:
def get_function_code(function):
    begin_idx = function['st_value'] - text_section['sh_offset']
    end_idx = begin_idx + function['st_size']
    return text_section.data()[begin_idx:end_idx]

In [186]:
general_purpose_regs = ["W" + str(i) for i in range(0, 31)] + ["X" + str(i) for i in range(0, 31)] + ["SP", "LR", "XZR"]
dit_instrs = ['ADC', 'ADCS', 'ADD', 'ADDS', 'AND', 'ANDS', 'ASR', 'ASRV', 'BFC', 'BFI', 'BFM', 'BFXIL', 'BIC', 'BICS', 'CCMN', 'CCMP', 'CFINV', 'CINC', 'CINV', 'CLS', 'CLZ', 'CMN', 'CMP', 'CNEG', 'CSEL', 'CSET', 'CSETM', 'CSINC', 'CSINV', 'CSNEG', 'EON', 'EOR', 'EXTR', 'LSL', 'LSLV', 'LSR', 'LSRV', 'MADD', 'MNEG', 'MOV', 'MOVK', 'MOVN', 'MOVZ', 'MSUB', 'MUL', 'MVN', 'NEG', 'NEGS', 'NGC', 'NGCS', 'NOP', 'ORN', 'ORR', 'RBIT', 'RET', 'REV', 'REV16', 'REV32', 'REV64', 'RMIF', 'ROR', 'RORV', 'SBC', 'SBCS', 'SBFIZ', 'SBFM', 'SBFX', 'SETF8', 'SETF16', 'SMADDL', 'SMNEGL', 'SMSUBL', 'SMULH', 'SMULL', 'SUB', 'SUBS', 'SXTB', 'SXTH', 'SXTW', 'TST', 'UBFIZ', 'UBFM', 'UBFX', 'UMADDL', 'UMNEGL', 'UMSUBL', 'UMULH', 'UMULL', 'UXTB', 'UXTH'] # these are the instructions for the general-purpose register file
dit_simd_instrs = ["ABS", "ADD", "ADDHN", "ADDHN2", "ADDP", "ADDV", "AND", "BIC", "BIF", "BIT", "BSL", "CLS", "CLZ", "CMEQ", "CMGE", "CMGT", "CMHI", "CMHS", "CMLE", "CMLT", "CMTST", "CNT", "CRC32B", "CRC32H", "CRC32W", "CRC32X", "CRC32CB", "CRC32CH", "CRC32CW", "CRC32CX", "DUP", "EOR", "EXT", "FCSEL", "INS", "MLA", "MLS", "MOV", "MOVI", "MUL", "MVN", "MVNI", "NEG", "NOT", "ORN", "ORR", "PMUL", "PMULL", "PMULL2", "RADDHN", "RADDHN2", "RBIT", "REV16", "REV32", "RSHRN", "RSHRN2", "RSUBHN", "RSUBHN2", "SABA", "SABD", "SABAL", "SABAL2", "SABDL", "SABDL2", "SADALP", "SADDL", "SADDL2", "SADDLP", "SADDLV", "SADDW", "SADDW2", "SHADD", "SHL", "SHLL", "SHLL2", "SHRN", "SHRN2", "SHSUB", "SLI", "SMAX", "SMAXP", "SMAXV", "SMIN", "SMINP", "SMINV", "SMLAL", "SMLAL2", "SMLSL", "SMLSL2", "SMOV", "SMULL", "SMULL2", "SRI", "SSHL", "SSHLL", "SSHLL2", "SSHR", "SSRA", "SSUBL", "SSUBL2", "SSUBW", "SSUBW2", "SUB", "SUBHN", "SUBHN2", "SXTL", "SXTL2", "TBL", "TBX", "TRN1", "TRN2", "UABA", "UABAL", "UABAL2", "UABD", "UABDL", "UABDL2", "UADALP", "UADDL", "UADDL2", "UADDLP", "UADDLV", "UADDW", "UADDW2", "UHADD", "UHSUB", "UMAX", "UMAXP", "UMAXV", "UMIN", "UMINP", "UMINV", "UMLAL", "UMLAL2", "UMLSL", "UMOV", "UMLSL2", "UMULL", "UMULL2", "USHL", "USHLL", "USHLL2", "USHR", "USRA", "USUBL", "USUBL2", "USUBW", "USUBW2", "UXTL", "UXTL2", "UZP1", "UZP2", "XTN", "XTN2", "ZIP1", "ZIP2"] # these are the instructions for the SIMD & FP register file

In [187]:
from capstone import *

bad_instrs = []
md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)

for function in functions:
    code = get_function_code(function)
    
    instrs = [i for i in md.disasm_lite(code, function['st_value'])]
    
    if(len(instrs) != len(code) / 4): 
        print("invalid disassembly for " + function.name)
        continue
    
    for (addr, size, mnemonic, op_str) in instrs:
        if(op_str == '' or op_str.split(",")[0].upper() in general_purpose_regs):
            if(mnemonic.upper() not in dit_instrs):
                bad_instrs += [Instr(function.name, addr, mnemonic, op_str)]
        elif(mnemonic.upper() not in dit_simd_instrs):
            bad_instrs += [Instr(function.name, addr, mnemonic, op_str)]

In [188]:
set([i.mnemonic for i in bad_instrs])

{'adrp', 'b.ne', 'bl', 'cbnz', 'fmov', 'ldp', 'ldr', 'stp', 'str', 'strb'}

In [193]:
from bisect import bisect_left

dwarf = elf.get_dwarf_info()
def get_lines_from_cu(cu):
    res = []
    lineprogram = dwarf.line_program_for_CU(cu)
    for entry in lineprogram.get_entries():
        state = entry.state
        if state is not None:
            res += [Line(lineprogram['file_entry'][state.file-1].name, state.line, state.address)]
    return res

def get_lines_from_dwarf(dwarf):
    res = []
    for cu in dwarf.iter_CUs():
        res += get_lines_from_cu(cu)
    return sorted(res, key=lambda l: l.addr)

def find_line(lines, addr):
    exact_matches = [l for l in filter(lambda line: line.addr == addr, lines)]
    if(len(exact_matches) != 0):
        return exact_matches
    addrs = [l.addr for l in lines]
    return [lines[bisect_left(addrs, addr) - 1]]

lines = get_lines_from_dwarf(dwarf)

In [200]:
#stores outside of the stack
[(str(i), str(find_line(lines, i.addr)[0])) for i in bad_instrs if i.mnemonic == "str" and i.op_str.split(", ")[1][0:3] != "[sp"]


[]

In [205]:
[(str(i), str(find_line(lines, i.addr)[0])) for i in bad_instrs if i.mnemonic == "strb"]

[('fix_sprint@0x1b34: strb w3, [x19, #0x3f]', "b'autogen.c':80(0x1b34)"),
 ('fix_sprint@0x1b64: strb w3, [x19, #0x3e]', "b'autogen.c':82(0x1b64)"),
 ('fix_sprint@0x1ba0: strb w3, [x19, #0x3d]', "b'autogen.c':84(0x1ba0)"),
 ('fix_sprint@0x1bf8: strb w3, [x19, #0x3c]', "b'autogen.c':86(0x1bf8)"),
 ('fix_sprint@0x1c34: strb w3, [x19, #0x3b]', "b'autogen.c':88(0x1c34)"),
 ('fix_sprint@0x1c94: strb w10, [x19, #0x3a]', "b'autogen.c':90(0x1c94)"),
 ('fix_sprint@0x1ce4: strb w11, [x19, #0x39]', "b'autogen.c':92(0x1ce4)"),
 ('fix_sprint@0x1d68: strb w18, [x19, #0x38]', "b'autogen.c':94(0x1d68)"),
 ('fix_sprint@0x1dec: strb w4, [x19, #0x37]', "b'autogen.c':96(0x1dec)"),
 ('fix_sprint@0x1ea8: strb w15, [x19, #0x36]', "b'autogen.c':98(0x1ea8)"),
 ('fix_sprint@0x1f4c: strb w18, [x19, #0x35]', "b'autogen.c':100(0x1f4c)"),
 ('fix_sprint@0x1fec: strb w18, [x19, #0x34]', "b'autogen.c':102(0x1fec)"),
 ('fix_sprint@0x2080: strb w20, [x19, #0x33]', "b'autogen.c':104(0x2080)"),
 ('fix_sprint@0x2124: strb w