|
| 1 | +; Test case for lifting a binary that calls abs() |
| 2 | +; |
| 3 | +; Copyright (C) 2025 Kestrel Institute |
| 4 | +; |
| 5 | +; License: A 3-clause BSD license. See the file books/3BSD-mod.txt. |
| 6 | +; |
| 7 | +; Author: Eric McCarthy (mccarthy@kestrel.edu) |
| 8 | + |
| 9 | +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
| 10 | + |
| 11 | +(in-package "X") |
| 12 | + |
| 13 | +;; STATUS: in progress |
| 14 | +;; |
| 15 | +;; This example tests Axe's ability to: |
| 16 | +;; 1. Detect that a call instruction targets the PLT |
| 17 | +;; 2. Resolve the PLT entry to the symbol name "abs" |
| 18 | +;; 3. Apply a summary for the abs() function |
| 19 | +;; |
| 20 | +;; The abs() function is ideal for testing because: |
| 21 | +;; - It's a real function call (not inlined) when compiled with -O0 -fno-builtin |
| 22 | +;; - No loops, no syscalls - pure computation |
| 23 | +;; - Simple semantics: abs(x) = (x < 0) ? -x : x |
| 24 | + |
| 25 | +;; cert_param: (uses-stp) |
| 26 | + |
| 27 | +(include-book "kestrel/axe/x86/unroller" :dir :system) |
| 28 | + |
| 29 | +; (depends-on "abs.elf64") |
| 30 | + |
| 31 | +;; Lift the test_abs subroutine into logic: |
| 32 | +; Not currently working |
| 33 | +;(def-unrolled test-abs |
| 34 | +; :executable "abs.elf64" |
| 35 | +; :target "test_abs" |
| 36 | +; :inputs ((x u32)) ; 32-bit input (treated as unsigned by Axe) |
| 37 | +; :output :rax |
| 38 | +; :stack-slots 10) |
| 39 | + |
| 40 | +;; NOTE: if you execute this def-unrolled, the lifting fails at address 0x404000 (the GOT entry for abs): |
| 41 | +;; |
| 42 | +;; (READ '8 '4210688 X86) ; 4210688 = 0x404000 |
| 43 | +;; |
| 44 | +;; Looking at the binary: |
| 45 | +;; <abs@plt>: |
| 46 | +;; 401030: jmp *0x2fca(%rip) # 404000 <abs@GLIBC_2.2.5> |
| 47 | +;; |
| 48 | +;; Axe tried to follow the indirect jump through the GOT, but the GOT contents |
| 49 | +;; are unknown at static analysis time. This is the exact point where Axe |
| 50 | +;; needs to: |
| 51 | +;; |
| 52 | +;; 1. Detect that 0x401030 is in the PLT, so that Axe can do something else |
| 53 | +;; instead of executing that instruction. |
| 54 | +;; To do that, look for section named ".plt" in the ELF section headers. |
| 55 | +;; E.g. readelf -S -W abs.elf64 |
| 56 | +;; and see |
| 57 | +;; [10] .plt PROGBITS 0000000000401020 001020 000020 10 AX 0 0 16 |
| 58 | +;; where 0000000000401020 is the address; 001020 is the file offset; |
| 59 | +;; 000020 is the size of the .plt section, and 10 is the entry size. |
| 60 | +;; Since 401020 <= 401030 <= 401020+20, it is in in the PLT. |
| 61 | +;; |
| 62 | +;; 2. Calculate the PLT index. (401030 - 401020) = 0x10 is the offset. |
| 63 | +;; Divide by the entry size (0x10 / 0x10) = 1 is the PLT index. |
| 64 | +;; |
| 65 | +;; 3. Look up the relocation (.rela.plt) to find the symbol index. |
| 66 | +;; Since PLT index 0 is a stub which is not needed in .rela.plt, |
| 67 | +;; we need to subtract 1 from the PLT index to get the relocation index. |
| 68 | +;; E.g. readelf -S -W abs.elf64 |
| 69 | +;; shows: |
| 70 | +;; [ 8] .rela.plt RELA 0000000000400468 000468 000018 18 AI 3 22 8 |
| 71 | +;; So, what do we find at .rela.pit[0]? |
| 72 | +;; Each .rela.plt entry is an Elf64_Rela structure (24 bytes): |
| 73 | +;; typedef struct { |
| 74 | +;; Elf64_Addr r_offset; // 8 bytes - GOT slot address |
| 75 | +;; Elf64_Xword r_info; // 8 bytes - symbol index + type |
| 76 | +;; Elf64_Sxword r_addend; // 8 bytes - addend (usually 0) |
| 77 | +;; } Elf64_Rela; |
| 78 | +;; The r_info field encodes two values: |
| 79 | +;; - Low 32 bits: 0x00000007 = relocation type (R_X86_64_JUMP_SLOT) |
| 80 | +;; - High 32 bits: (r_info >> 32) = 0x00000002 = symbol index into .dynsym |
| 81 | +;; |
| 82 | +;; 4. Look up the symbol index into .dynsym, to find the string start index into .dynstr |
| 83 | +;; E.g. readelf -S -W abs.elf64 |
| 84 | +;; shows: |
| 85 | +;; [ 3] .dynsym DYNSYM 0000000000400358 000358 000060 18 A 4 1 8 |
| 86 | +;; Since each entry is 0x18 bytes, .dynsym[2] would be at 400358+30 = 0x400388 |
| 87 | +;; The first 4 bytes at 0x400388 are the little-endian index into .dynstr |
| 88 | +;; There is more detail there, like type and binding and visibility, but for this case, |
| 89 | +;; we just look at the first 4 bytes. |
| 90 | +;; In this case, |
| 91 | +;; readelf -x .dynsym abs.elf64 |
| 92 | +;; shows: |
| 93 | +;; ... |
| 94 | +;; 0x00400388 01000000 12000000 00000000 00000000 ................ |
| 95 | +;; So the string start index is 1. |
| 96 | +;; |
| 97 | +;; 5. Look up the string in .dynstr |
| 98 | +;; readelf -S -W abs.elf64 |
| 99 | +;; shows: |
| 100 | +;; [ 4] .dynstr STRTAB 00000000004003b8 0003b8 000047 00 A 0 0 1 |
| 101 | +;; Since the string start index is 1, the string is at 0x4003b9. |
| 102 | +;; Let's look there: |
| 103 | +;; readelf -x .dynstr abs.elf64 |
| 104 | +;; shows: |
| 105 | +;; Hex dump of section '.dynstr': |
| 106 | +;; 0x004003b8 00616273 005f5f6c 6962635f 73746172 .abs.__libc_star |
| 107 | +;; So the string starting at 0x4003b9 to the next null is "abs" |
| 108 | +;; |
| 109 | +;; 6. Apply an abs summary instead of trying to execute the actual library code. |
| 110 | + |
| 111 | + |
| 112 | +;; Once library call detection is working, the above should produce |
| 113 | +;; something equivalent to: |
| 114 | +;; |
| 115 | +;; (defun test-abs (x) |
| 116 | +;; (if (sbvlt 32 x 0) |
| 117 | +;; (bvuminus 32 x) |
| 118 | +;; x)) |
| 119 | +;; |
| 120 | +;; Or using abs model: |
| 121 | +;; (defun test-abs (x) |
| 122 | +;; (abs-model x)) |
| 123 | + |
| 124 | +;; Correctness theorems (to be enabled once library summaries work): |
| 125 | + |
| 126 | +;; (defthm test-abs-of-positive |
| 127 | +;; (implies (not (sbvlt 32 x 0)) ; x >= 0 |
| 128 | +;; (equal (test-abs x) |
| 129 | +;; (bvchop 32 x)))) |
| 130 | + |
| 131 | +;; (defthm test-abs-of-negative |
| 132 | +;; (implies (sbvlt 32 x 0) ; x < 0 |
| 133 | +;; (equal (test-abs x) |
| 134 | +;; (bvuminus 32 x)))) |
| 135 | + |
| 136 | +;; (defthm test-abs-examples |
| 137 | +;; (and (equal (test-abs 5) 5) |
| 138 | +;; (equal (test-abs -5) 5) |
| 139 | +;; (equal (test-abs 0) 0))) |
0 commit comments