From 1156cfc85f263889d6916997b01a3acd2820fa3e Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Sun, 5 May 2024 15:30:08 -0700 Subject: [PATCH 1/3] Reject BPF program if uninit stack is accessed Reject programs if registers are used before intialized Make undefined behavior check optional Signed-off-by: Alan Jowett --- libfuzzer/README.md | 94 ++++++++++ libfuzzer/libfuzz_harness.cc | 202 +++++++++++++++------ libfuzzer/split.sh | 37 ++++ ubpf/disassembler.py | 4 + vm/inc/ubpf.h | 11 ++ vm/ubpf_int.h | 1 + vm/ubpf_vm.c | 329 +++++++++++++++++++++++++++++++---- 7 files changed, 594 insertions(+), 84 deletions(-) create mode 100644 libfuzzer/README.md create mode 100755 libfuzzer/split.sh diff --git a/libfuzzer/README.md b/libfuzzer/README.md new file mode 100644 index 00000000..8eab2390 --- /dev/null +++ b/libfuzzer/README.md @@ -0,0 +1,94 @@ +# ubpf_fuzzer + +This is a libfuzzer based fuzzer. + +To build, run: +``` +cmake \ + -G Ninja \ + -S . \ + -B build \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DUBPF_ENABLE_LIBFUZZER=1 \ + -DCMAKE_BUILD_TYPE=Debug + +cmake --build build +``` + +To run: +Create folder for the corpus and artifacts for any crashes found, then run the fuzzer. + +``` +mkdir corpus +mkdir artifacts +build/bin/ubpf_fuzzer corpus -artifact_prefix=artifacts/ +``` + +Optionally, add the "-jobs=100" to gather 100 crashes at a time. + +This will produce a lot of output that looks like: +``` +#529745 REDUCE cov: 516 ft: 932 corp: 442/22Kb lim: 2875 exec/s: 264872 rss: 429Mb L: 50/188 MS: 3 CrossOver-ChangeBit-EraseBytes- +#529814 REDUCE cov: 516 ft: 932 corp: 442/22Kb lim: 2875 exec/s: 264907 rss: 429Mb L: 45/188 MS: 4 ChangeBit-ShuffleBytes-PersAutoDict-EraseBytes- DE: "\005\000\000\000\000\000\000\000"- +#530202 REDUCE cov: 516 ft: 932 corp: 442/22Kb lim: 2875 exec/s: 265101 rss: 429Mb L: 52/188 MS: 3 ChangeByte-ChangeASCIIInt-EraseBytes- +#531224 REDUCE cov: 518 ft: 934 corp: 443/22Kb lim: 2875 exec/s: 265612 rss: 429Mb L: 73/188 MS: 2 CopyPart-PersAutoDict- DE: "\001\000\000\000"- +#531750 REDUCE cov: 518 ft: 934 corp: 443/22Kb lim: 2875 exec/s: 265875 rss: 429Mb L: 45/188 MS: 1 EraseBytes- +#532127 REDUCE cov: 519 ft: 935 corp: 444/22Kb lim: 2875 exec/s: 266063 rss: 429Mb L: 46/188 MS: 2 ChangeBinInt-ChangeByte- +#532246 REDUCE cov: 519 ft: 935 corp: 444/22Kb lim: 2875 exec/s: 266123 rss: 429Mb L: 66/188 MS: 4 ChangeBit-CrossOver-ShuffleBytes-EraseBytes- +#532357 NEW cov: 520 ft: 936 corp: 445/22Kb lim: 2875 exec/s: 266178 rss: 429Mb L: 55/188 MS: 1 ChangeBinInt- +#532404 REDUCE cov: 520 ft: 936 corp: 445/22Kb lim: 2875 exec/s: 266202 rss: 429Mb L: 57/188 MS: 2 ChangeBit-EraseBytes- +#532486 REDUCE cov: 520 ft: 936 corp: 445/22Kb lim: 2875 exec/s: 266243 rss: 429Mb L: 44/188 MS: 2 EraseByte +``` + +Eventually it will probably crash and produce a message like: +``` +================================================================= +==376403==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x000000000000 bp 0x7ffca9d3cda0 sp 0x7ffca9d3cb98 T0) +==376403==Hint: pc points to the zero page. +==376403==The signal is caused by a READ memory access. +==376403==Hint: address points to the zero page. + #0 0x0 () + #1 0x50400001a48f () + +AddressSanitizer can not provide additional info. +SUMMARY: AddressSanitizer: SEGV () +==376403==ABORTING +MS: 1 ChangeByte-; base unit: cea14e5e2ecdc723b9beb640471a18b4ea529f75 +0x28,0x0,0x0,0x0,0xb4,0x50,0x10,0x6a,0x6a,0x4a,0x6a,0x2d,0x2e,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x4,0x21,0x0,0x0,0x0,0x0,0x95,0x95,0x26,0x21,0xfc,0xff,0xff,0xff,0x95,0x95,0x95,0x95,0x97,0xb7,0x97,0x97,0x0,0x8e,0x0,0x24, +(\000\000\000\264P\020jjJj-.\001\000\000\000\000\000\000\004!\000\000\000\000\225\225&!\374\377\377\377\225\225\225\225\227\267\227\227\000\216\000$ +artifact_prefix='artifacts/'; Test unit written to artifacts/crash-7036cbef2b568fa0b6e458a9c8062571a65144e1 +Base64: KAAAALRQEGpqSmotLgEAAAAAAAAEIQAAAACVlSYh/P///5WVlZWXt5eXAI4AJA== +``` + +To triage the crash, the crash can be post processed using: +``` +libfuzzer/split.sh artifacts/crash-7036cbef2b568fa0b6e458a9c8062571a65144e1 + + +Extracting program-7036cbef2b568fa0b6e458a9c8062571a65144e1... +Extracting memory-7036cbef2b568fa0b6e458a9c8062571a65144e1... +Disassembling program-7036cbef2b568fa0b6e458a9c8062571a65144e1... +Program size: 40 +Memory size: 2 +Disassembled program: +mov32 %r0, 0x2d6a4a6a +jgt32 %r1, %r0, +0 +add32 %r1, 0x95950000 +jgt32 %r1, 0x9595ffff, -4 +exit +Memory contents: +00000000: 0024 .$ +``` + +To repro the crash, you can run: +``` +build/bin/ubpf_fuzzer artifacts/crash-7036cbef2b568fa0b6e458a9c8062571a65144e1 +``` + +Or you can repro it using ubpf_test: +``` +build/bin/ubpf-test --mem artifacts/memory-7036cbef2b568fa0b6e458a9c8062571a65144e1 artifacts/program-7036cbef2b568fa0b6e458a9c8062571a65144e1 --jit +``` + diff --git a/libfuzzer/libfuzz_harness.cc b/libfuzzer/libfuzz_harness.cc index 4e6426af..7b6fed77 100644 --- a/libfuzzer/libfuzz_harness.cc +++ b/libfuzzer/libfuzz_harness.cc @@ -10,7 +10,6 @@ #include #include - extern "C" { #include "ebpf.h" @@ -42,26 +41,107 @@ int null_printf(FILE* stream, const char* format, ...) return 0; } -/** - * @brief Accept an input buffer and size. - * - * @param[in] data Pointer to the input buffer. - * @param[in] size Size of the input buffer. - * @return -1 if the input is invalid - * @return 0 if the input is valid and processed. - */ -int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) +typedef std::unique_ptr ubpf_vm_ptr; + +ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) { - // Assume the fuzzer input is as follows: - // 32-bit program length - // program byte - // test data + // Automatically free the VM when it goes out of scope. + std::unique_ptr vm(ubpf_create(), ubpf_destroy); - // Copy memory into a writable buffer. - std::vector memory; + if (vm == nullptr) { + // Failed to create the VM. + // This is not interesting, as the fuzzer input is invalid. + // Do not add it to the corpus. + return vm; + } + + ubpf_toggle_undefined_behavior_check(vm.get(), true); + + char* error_message = nullptr; + + ubpf_set_error_print(vm.get(), null_printf); + + if (ubpf_load(vm.get(), program_code.data(), program_code.size(), &error_message) != 0) { + // The program failed to load, due to a validation error. + // This is not interesting, as the fuzzer input is invalid. + // Do not add it to the corpus. + free(error_message); + vm.reset(); + return vm; + } + + ubpf_toggle_bounds_check(vm.get(), true); + + if (ubpf_register_external_dispatcher(vm.get(), test_helpers_dispatcher, test_helpers_validator) != 0) { + // Failed to register the external dispatcher. + // This is not interesting, as the fuzzer input is invalid. + // Do not add it to the corpus. + vm.reset(); + return vm; + } + + if (ubpf_set_instruction_limit(vm.get(), 10000, nullptr) != 0) { + // Failed to set the instruction limit. + // This is not interesting, as the fuzzer input is invalid. + // Do not add it to the corpus. + vm.reset(); + return vm; + } + + return vm; +} + +bool call_ubpf_interpreter(const std::vector& program_code, std::vector& memory, std::vector& ubpf_stack, uint64_t& interpreter_result) +{ + auto vm = create_ubpf_vm(program_code); + + if (vm == nullptr) { + // VM creation failed. + return false; + } + + // Execute the program using the input memory. + if (ubpf_exec_ex(vm.get(), memory.data(), memory.size(), &interpreter_result, ubpf_stack.data(), ubpf_stack.size()) != 0) { + // VM execution failed. + return false; + } + + // VM execution succeeded. + return true; +} + +bool call_ubpf_jit(const std::vector& program_code, std::vector& memory, std::vector& ubpf_stack, uint64_t& jit_result) +{ + auto vm = create_ubpf_vm(program_code); + + char* error_message = nullptr; + + if (vm == nullptr) { + // VM creation failed. + return false; + } + auto fn = ubpf_compile_ex(vm.get(), &error_message, JitMode::ExtendedJitMode); + + if (fn == nullptr) { + free(error_message); + + // Compilation failed. + return false; + } + + jit_result = fn(memory.data(), memory.size(), ubpf_stack.data(), ubpf_stack.size()); + + // Compilation succeeded. + return true; +} + +bool call_linux_jit(const std::vector& program_code, std::vector& memory, std::vector ubpf_stack, uint64_t& linux_jit_result); + +bool split_input(const uint8_t* data, std::size_t size, std::vector& program, std::vector& memory) +{ if (size < 4) - return -1; + return false; uint32_t program_length = *reinterpret_cast(data); uint32_t memory_length = size - 4 - program_length; @@ -71,22 +151,25 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) if (program_length > size) { // The program length is larger than the input size. // This is not interesting, as the fuzzer input is invalid. - // Do not add it to the corpus. - return -1; + return false; } if (program_length == 0) { // The program length is zero. // This is not interesting, as the fuzzer input is invalid. - // Do not add it to the corpus. - return -1; + return false; } if (program_length + 4u > size) { // The program length is larger than the input size. // This is not interesting, as the fuzzer input is invalid. - // Do not add it to the corpus. - return -1; + return false; + } + + if ((program_length % sizeof(ebpf_inst)) != 0) { + // The program length needs to be a multiple of sizeof(ebpf_inst_t). + // This is not interesting, as the fuzzer input is invalid. + return false; } // Copy any input memory into a writable buffer. @@ -95,53 +178,64 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) std::memcpy(memory.data(), memory_start, memory_length); } - // Automatically free the VM when it goes out of scope. - std::unique_ptr vm(ubpf_create(), ubpf_destroy); + program.resize(program_length); + std::memcpy(program.data(), program_start, program_length); - if (vm == nullptr) { - // Failed to create the VM. - // This is not interesting, as the fuzzer input is invalid. - // Do not add it to the corpus. - return -1; - } + return true; +} - char* error_message = nullptr; +/** + * @brief Accept an input buffer and size. + * + * @param[in] data Pointer to the input buffer. + * @param[in] size Size of the input buffer. + * @return -1 if the input is invalid + * @return 0 if the input is valid and processed. + */ +int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) +{ + // Assume the fuzzer input is as follows: + // 32-bit program length + // program byte + // test data - if (ubpf_load(vm.get(), program_start, program_length, &error_message) != 0) { - // The program failed to load, due to a validation error. - // This is not interesting, as the fuzzer input is invalid. - // Do not add it to the corpus. - free(error_message); + std::vector program; + std::vector memory; + std::vector ubpf_stack(3*4096); + + if (!split_input(data, size, program, memory)) { + // The input is invalid. Not interesting. return -1; } - ubpf_set_error_print(vm.get(), null_printf); + uint64_t interpreter_result = 0; + uint64_t jit_result = 0; - ubpf_toggle_bounds_check(vm.get(), true); - - if (ubpf_register_external_dispatcher(vm.get(), test_helpers_dispatcher, test_helpers_validator) != 0) { - // Failed to register the external dispatcher. + if (!call_ubpf_interpreter(program, memory, ubpf_stack, interpreter_result)) { + // Failed to load or execute the program in the interpreter. // This is not interesting, as the fuzzer input is invalid. - // Do not add it to the corpus. - return -1; + return 0; } - if (ubpf_set_instruction_limit(vm.get(), 10000, nullptr) != 0) { - // Failed to set the instruction limit. - // This is not interesting, as the fuzzer input is invalid. - // Do not add it to the corpus. + if (!split_input(data, size, program, memory)) { + // The input is invalid. Not interesting. return -1; } - uint64_t result = 0; - - // Execute the program using the input memory. - if (ubpf_exec(vm.get(), memory.data(), memory.size(), &result) != 0) { - // The program passed validation during load, but failed during execution. - // due to a runtime error. Add it to the corpus as it may be interesting. + if (!call_ubpf_jit(program, memory, ubpf_stack, jit_result)) { + // Failed to load or execute the program in the JIT. + // This is not interesting, as the fuzzer input is invalid. return 0; } + // If interpreter_result is not equal to jit_result, raise a fatal signal + if (interpreter_result != jit_result) { + printf("%lx ubpf_stack\n", reinterpret_cast(ubpf_stack.data()) + ubpf_stack.size()); + printf("interpreter_result: %lx\n", interpreter_result); + printf("jit_result: %lx\n", jit_result); + throw std::runtime_error("interpreter_result != jit_result"); + } + // Program executed successfully. // Add it to the corpus as it may be interesting. return 0; diff --git a/libfuzzer/split.sh b/libfuzzer/split.sh new file mode 100755 index 00000000..2f562afa --- /dev/null +++ b/libfuzzer/split.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Split the file name into path and base name +path=$(dirname $1) +base=$(basename $1) + +# Get the first 4 bytes from the file (which is the length of the program) +input="$(xxd -p -l 4 $1)" +# Convert from little endian +input="${input:6:2}${input:4:2}${input:2:2}${input:0:2}" + +# Convert input from hex string to value +length=$((16#$input)) + +# Extract the hash part from the file name +hash=$(echo $base | cut -d'-' -f2-) + +# Copy the program to a file named program-$hash +echo "Extracting program-$hash..." +dd if=$1 of=$path/program-$hash bs=1 skip=4 count=$length 2> /dev/null + +echo "Extracting memory-$hash..." +# Copy the rest to a file named memory-$hash +dd if=$1 of=$path/memory-$hash bs=1 skip=$((4 + $length)) 2> /dev/null + +echo "Disassembling program-$hash..." +# Unassembly program using bin/ubpf-disassembler +bin/ubpf-disassembler $path/program-$hash > $path/program-$hash.asm + +echo "Program size: $(stat -c %s $path/program-$hash)" +echo "Memory size: $(stat -c %s $path/memory-$hash)" + +echo "Disassembled program:" +cat $path/program-$hash.asm + +echo "Memory contents:" +xxd $path/memory-$hash diff --git a/ubpf/disassembler.py b/ubpf/disassembler.py index b39f7cf5..bd78b255 100644 --- a/ubpf/disassembler.py +++ b/ubpf/disassembler.py @@ -128,6 +128,8 @@ def disassemble_one(data, offset): if opcode_name == "exit": return opcode_name elif opcode_name == "call": + if src_reg == 1: + opcode_name += " local" return "%s %s" % (opcode_name, I(imm)) elif opcode_name == "ja": return "%s %s" % (opcode_name, O(off)) @@ -143,6 +145,8 @@ def disassemble_one(data, offset): if opcode_name == "exit": return opcode_name elif opcode_name == "call": + if src_reg == 1: + opcode_name += " local" return "%s %s" % (opcode_name, I(imm)) elif opcode_name == "ja": return "%s %s" % (opcode_name, O(off)) diff --git a/vm/inc/ubpf.h b/vm/inc/ubpf.h index c4555225..13423bd2 100644 --- a/vm/inc/ubpf.h +++ b/vm/inc/ubpf.h @@ -544,6 +544,17 @@ extern "C" int ubpf_set_instruction_limit(struct ubpf_vm* vm, uint32_t limit, uint32_t* previous_limit); + /** + * @brief Enable or disable undefined behavior checks. Undefined behavior includes + * reading from uninitialized memory or using uninitialized registers. + * + * @param[in] vm VM to enable or disable undefined behavior checks on. + * @param[in] enable Enable undefined behavior checks if true, disable if false. + * @return true if undefined behavior checks were previously enabled. + * @return false if undefined behavior checks were previously disabled. + */ + bool + ubpf_toggle_undefined_behavior_check(struct ubpf_vm* vm, bool enable); #ifdef __cplusplus } #endif diff --git a/vm/ubpf_int.h b/vm/ubpf_int.h index 5f6bc532..157f5a17 100644 --- a/vm/ubpf_int.h +++ b/vm/ubpf_int.h @@ -74,6 +74,7 @@ struct ubpf_vm external_function_validate_t dispatcher_validate; bool bounds_check_enabled; + bool undefined_behavior_check_enabled; int (*error_printf)(FILE* stream, const char* format, ...); struct ubpf_jit_result (*jit_translate)(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum JitMode jit_mode); bool (*jit_update_dispatcher)( diff --git a/vm/ubpf_vm.c b/vm/ubpf_vm.c index be364de2..3fba72f1 100644 --- a/vm/ubpf_vm.c +++ b/vm/ubpf_vm.c @@ -56,6 +56,14 @@ ubpf_toggle_bounds_check(struct ubpf_vm* vm, bool enable) return old; } +bool +ubpf_toggle_undefined_behavior_check(struct ubpf_vm* vm, bool enable) +{ + bool old = vm->undefined_behavior_check_enabled; + vm->undefined_behavior_check_enabled = enable; + return old; +} + void ubpf_set_error_print(struct ubpf_vm* vm, int (*error_printf)(FILE* stream, const char* format, ...)) { @@ -105,6 +113,7 @@ ubpf_create(void) } vm->bounds_check_enabled = true; + vm->undefined_behavior_check_enabled = false; vm->error_printf = fprintf; #if defined(__x86_64__) || defined(_M_X64) @@ -373,6 +382,220 @@ ubpf_mem_store(uint64_t address, uint64_t value, size_t size) } } +/** + * @brief Mark the bits in the shadow stack corresponding to the address if it is within the stack bounds. + * + * @param[in] stack The base address of the stack. + * @param[in] shadow_stack The base address of the shadow stack. + * @param[in] address The address being written to. + * @param[in] size The number of bytes being written. + */ +static inline void +ubpf_mark_shadow_stack( + const struct ubpf_vm* vm, uint8_t* stack, uint64_t stack_length, uint8_t* shadow_stack, void* address, size_t size) +{ + if (!vm->undefined_behavior_check_enabled) { + return; + } + + uintptr_t access_start = (uintptr_t)address; + uintptr_t access_end = access_start + size; + uintptr_t stack_start = (uintptr_t)stack; + uintptr_t stack_end = stack_start + stack_length; + + if (access_start > access_end) { + // Overflow + return; + } + + if (access_start >= stack_start && access_end <= stack_end) { + // Shadow stack is an bit array, where each bit corresponds to 1 byte in the stack. + // If the bit is set, the memory is initialized. + size_t offset = access_start - stack_start; + for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { + // Convert test_bit into offset + mask to test against the shadow stack. + size_t bit_offset = test_bit / 8; + size_t bit_mask = 1 << (test_bit % 8); + shadow_stack[bit_offset] |= bit_mask; + } + } +} + +/** + * @brief Check if the address is within the stack bounds and the shadow stack is marked for the address. + * + * @param[in] stack The base address of the stack. + * @param[in] shadow_stack The base address of the shadow stack. + * @param[in] address The address being read from. + * @param[in] size The number of bytes being read. + * @return true - The read is from initialized memory or is not within the stack bounds. + * @return false - The read is from uninitialized memory within the stack bounds. + */ +static inline bool +ubpf_check_shadow_stack( + const struct ubpf_vm* vm, uint8_t* stack, uint64_t stack_length, uint8_t* shadow_stack, void* address, size_t size) +{ + if (!vm->undefined_behavior_check_enabled) { + return true; + } + + uintptr_t access_start = (uintptr_t)address; + uintptr_t access_end = access_start + size; + uintptr_t stack_start = (uintptr_t)stack; + uintptr_t stack_end = stack_start + stack_length; + + if (access_start > access_end) { + // Overflow + return true; + } + + if (access_start >= stack_start && access_end <= stack_end) { + // Shadow stack is an bit array, where each bit corresponds to 1 byte in the stack. + // If the bit is set, the memory is initialized. + size_t offset = access_start - stack_start; + for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { + // Convert test_bit into offset + mask to test against the shadow stack. + size_t bit_offset = test_bit / 8; + size_t bit_mask = 1 << (test_bit % 8); + if ((shadow_stack[bit_offset] & bit_mask) == 0) { + return false; + } + } + } + return true; +} + +/** + * @brief Check if the registers being accessed by this instruction are initialized and mark the destination register as + * initialized if it is. + * + * @param[in] vm The VM instance. + * @param[in,out] shadow_registers Storage for the shadow register state. + * @param[in] inst The instruction being executed. + * @return true - The registers are initialized. + * @return false - The registers are not initialized - an error message has been printed. + */ +static inline bool +ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registers, struct ebpf_inst inst) +{ + if (!vm->undefined_behavior_check_enabled) { + return true; + } + + bool src_register_required = false; + bool dst_register_required = false; + bool dst_register_initialized = false; + + switch (inst.opcode & EBPF_CLS_MASK) { + // Load instructions initialize the destination register. + case EBPF_CLS_LD: + dst_register_initialized = true; + break; + // Load indirect instructions initialize the destination register and require the source register to be initialized. + case EBPF_CLS_LDX: + src_register_required = true; + dst_register_initialized = true; + break; + // Store instructions require the destination register to be initialized. + case EBPF_CLS_ST: + dst_register_required = true; + break; + // Store indirect instructions require both the source and destination registers to be initialized. + case EBPF_CLS_STX: + dst_register_required = true; + src_register_required = true; + break; + case EBPF_CLS_ALU: + case EBPF_CLS_ALU64: + // Source register is required if the EBPF_SRC_REG bit is set. + src_register_required = inst.opcode & EBPF_SRC_REG; + dst_register_initialized = true; + switch (inst.opcode & EBPF_ALU_OP_MASK) { + case 0x00: // EBPF_OP_ADD + case 0x10: // EBPF_OP_SUB + case 0x20: // EBPF_OP_MUL + case 0x30: // EBPF_OP_DIV + case 0x40: // EBPF_OP_OR + case 0x50: // EBPF_OP_AND + case 0x60: // EBPF_OP_LSH + case 0x70: // EBPF_OP_RSH + case 0x80: // EBPF_OP_NEG + case 0x90: // EBPF_OP_MOD + case 0xa0: // EBPF_OP_XOR + case 0xc0: // EBPF_OP_ARSH + case 0xd0: // EBPF_OP_LE + dst_register_required = true; + break; + case 0xb0: // EBPF_OP_MOV + // Destination register is initialized. + break; + } + break; + case EBPF_CLS_JMP: + case EBPF_CLS_JMP32: + // Source register is required if the EBPF_SRC_REG bit is set. + src_register_required = inst.opcode & EBPF_SRC_REG; + switch (inst.opcode & EBPF_JMP_OP_MASK) { + case EBPF_MODE_JA: + case EBPF_MODE_CALL: + case EBPF_MODE_EXIT: + src_register_required = false; + break; + case EBPF_MODE_JEQ: + case EBPF_MODE_JGT: + case EBPF_MODE_JGE: + case EBPF_MODE_JSET: + case EBPF_MODE_JNE: + case EBPF_MODE_JSGT: + case EBPF_MODE_JSGE: + case EBPF_MODE_JLT: + case EBPF_MODE_JLE: + case EBPF_MODE_JSLT: + case EBPF_MODE_JSLE: + dst_register_required = true; + break; + } + break; + } + + if (src_register_required && !(*shadow_registers & (1 << inst.src))) { + vm->error_printf(stderr, "Error: Source register r%d is not initialized.\n", inst.src); + return false; + } + + if (dst_register_required && !(*shadow_registers & (1 << inst.dst))) { + vm->error_printf(stderr, "Error: Destination register r%d is not initialized.\n", inst.dst); + return false; + } + + if (dst_register_initialized) { + *shadow_registers |= 1 << inst.dst; + } + + if (inst.opcode == EBPF_OP_CALL) { + if (inst.src == 0) { + // Mark the return address register as initialized. + *shadow_registers |= 1 << 0; + + // Mark r1-r5 as uninitialized. + *shadow_registers &= ~0x3e; + } else if (inst.src == 1) { + // Do nothing, register state will be handled by the callee on return. + } + } + + if (inst.opcode == EBPF_OP_EXIT) { + if (!(*shadow_registers & (1 << 0))) { + vm->error_printf(stderr, "Error: Return address register r0 is not initialized.\n"); + return false; + } + // Mark r1-r5 as uninitialized. + *shadow_registers &= ~0x3e; + } + + return true; +} + int ubpf_exec_ex( const struct ubpf_vm* vm, @@ -389,6 +612,9 @@ ubpf_exec_ex( uint64_t stack_frame_index = 0; int return_value = -1; void* external_dispatcher_cookie = mem; + void* shadow_stack = NULL; + + struct ebpf_inst previous_inst = {.opcode = 0}; if (!insts) { /* Code must be loaded before we can execute */ @@ -399,6 +625,14 @@ ubpf_exec_ex( 0, }; + if (vm->undefined_behavior_check_enabled) { + shadow_stack = calloc(stack_length / 8, 1); + if (!shadow_stack) { + return_value = -1; + goto cleanup; + } + } + #ifdef DEBUG if (vm->regs) reg = vm->regs; @@ -407,11 +641,15 @@ ubpf_exec_ex( #else reg = _reg; #endif + uint16_t shadow_registers = 0; // Bit mask of registers that have been written to. reg[1] = (uintptr_t)mem; reg[2] = (uint64_t)mem_len; reg[10] = (uintptr_t)stack_start + stack_length; + // Mark r1, r2, r10 as initialized. + shadow_registers |= (1 << 1) | (1 << 2) | (1 << 10); + int instruction_limit = vm->instruction_limit; while (1) { @@ -426,11 +664,26 @@ ubpf_exec_ex( } if ((pc == 0 || vm->int_funcs[pc]) && stack_frame_index < UBPF_MAX_CALL_DEPTH) { + // If this is neither the first instruction nor a local function call, then the behavior is undefined. + if (previous_inst.opcode != 0 && !(previous_inst.opcode == EBPF_OP_CALL && previous_inst.src == 1)) { + // Previous instruction wasn't a call to this instruction, so behavior is undefined. + if (vm->undefined_behavior_check_enabled) { + vm->error_printf( + stderr, "Error: Call to local function at pc %d is not from a call instruction.\n", pc); + return_value = -1; + goto cleanup; + } + } stack_frames[stack_frame_index].stack_usage = ubpf_stack_usage_for_local_func(vm, pc); } struct ebpf_inst inst = ubpf_fetch_instruction(vm, pc++); + if (!ubpf_validate_shadow_register(vm, &shadow_registers, inst)) { + return_value = -1; + goto cleanup; + } + switch (inst.opcode) { case EBPF_OP_ADD_IMM: reg[inst.dst] += inst.imm; @@ -630,37 +883,43 @@ ubpf_exec_ex( * * Needed since we don't have a verifier yet. */ -#define BOUNDS_CHECK_LOAD(size) \ - do { \ - if (!bounds_check( \ - vm, \ - (char*)reg[inst.src] + inst.offset, \ - size, \ - "load", \ - cur_pc, \ - mem, \ - mem_len, \ - stack_start, \ - stack_length)) { \ - return_value = -1; \ - goto cleanup; \ - } \ +#define BOUNDS_CHECK_LOAD(size) \ + do { \ + if (!ubpf_check_shadow_stack( \ + vm, stack_start, stack_length, shadow_stack, (char*)reg[inst.src] + inst.offset, size)) { \ + return_value = -1; \ + goto cleanup; \ + } \ + if (!bounds_check( \ + vm, \ + (char*)reg[inst.src] + inst.offset, \ + size, \ + "load", \ + cur_pc, \ + mem, \ + mem_len, \ + stack_start, \ + stack_length)) { \ + return_value = -1; \ + goto cleanup; \ + } \ } while (0) -#define BOUNDS_CHECK_STORE(size) \ - do { \ - if (!bounds_check( \ - vm, \ - (char*)reg[inst.dst] + inst.offset, \ - size, \ - "store", \ - cur_pc, \ - mem, \ - mem_len, \ - stack_start, \ - stack_length)) { \ - return_value = -1; \ - goto cleanup; \ - } \ +#define BOUNDS_CHECK_STORE(size) \ + do { \ + if (!bounds_check( \ + vm, \ + (char*)reg[inst.dst] + inst.offset, \ + size, \ + "store", \ + cur_pc, \ + mem, \ + mem_len, \ + stack_start, \ + stack_length)) { \ + return_value = -1; \ + goto cleanup; \ + } \ + ubpf_mark_shadow_stack(vm, stack_start, stack_length, shadow_stack, (char*)reg[inst.dst] + inst.offset, size); \ } while (0) case EBPF_OP_LDXW: @@ -1002,16 +1261,25 @@ ubpf_exec_ex( // Because we have already validated, we can assume that the type code is // valid. break; + default: + vm->error_printf(stderr, "Error: unknown opcode %d at PC %d\n", inst.opcode, cur_pc); + return_value = -1; + goto cleanup; } if (((inst.opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU) && (inst.opcode & EBPF_ALU_OP_MASK) != 0xd0) { reg[inst.dst] &= UINT32_MAX; } + // Save the previous instruction for detecting falling through to the start of another function. + previous_inst = inst; } cleanup: #if defined(NTDDI_VERSION) && defined(WINNT) free(stack_frames); #endif + if (shadow_stack) { + free(shadow_stack); + } return return_value; } @@ -1358,6 +1626,7 @@ ubpf_get_registers(const struct ubpf_vm* vm) { return vm->regs; } + #else void ubpf_set_registers(struct ubpf_vm* vm, uint64_t* regs) From 3afa417374fae4824ee86c30de7dd91eb78069a9 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Mon, 20 May 2024 19:59:35 -0700 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Will Hawkins Signed-off-by: Alan Jowett --- libfuzzer/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libfuzzer/README.md b/libfuzzer/README.md index 8eab2390..42fa9541 100644 --- a/libfuzzer/README.md +++ b/libfuzzer/README.md @@ -62,11 +62,10 @@ artifact_prefix='artifacts/'; Test unit written to artifacts/crash-7036cbef2b568 Base64: KAAAALRQEGpqSmotLgEAAAAAAAAEIQAAAACVlSYh/P///5WVlZWXt5eXAI4AJA== ``` -To triage the crash, the crash can be post processed using: +To triage the crash, post process it with: ``` libfuzzer/split.sh artifacts/crash-7036cbef2b568fa0b6e458a9c8062571a65144e1 - Extracting program-7036cbef2b568fa0b6e458a9c8062571a65144e1... Extracting memory-7036cbef2b568fa0b6e458a9c8062571a65144e1... Disassembling program-7036cbef2b568fa0b6e458a9c8062571a65144e1... From 3efeeacd0734b0669c9a640da87d8c0678c65dad Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Mon, 20 May 2024 20:23:59 -0700 Subject: [PATCH 3/3] PR feedback Signed-off-by: Alan Jowett --- libfuzzer/libfuzz_harness.cc | 55 ++++++++++++++++++++++++++++-------- vm/inc/ubpf.h | 3 +- vm/ubpf_vm.c | 28 ++++++++++-------- 3 files changed, 63 insertions(+), 23 deletions(-) diff --git a/libfuzzer/libfuzz_harness.cc b/libfuzzer/libfuzz_harness.cc index 7b6fed77..45f5be3f 100644 --- a/libfuzzer/libfuzz_harness.cc +++ b/libfuzzer/libfuzz_harness.cc @@ -17,6 +17,7 @@ extern "C" } #include "test_helpers.h" +#include uint64_t test_helpers_dispatcher(uint64_t p0, uint64_t p1,uint64_t p2,uint64_t p3, uint64_t p4, unsigned int idx, void* cookie) { UNREFERENCED_PARAMETER(cookie); @@ -43,6 +44,12 @@ int null_printf(FILE* stream, const char* format, ...) typedef std::unique_ptr ubpf_vm_ptr; +/** + * @brief Create a ubpf vm object and load the program code into it. + * + * @param[in] program_code The program code to load into the VM. + * @return A unique pointer to the ubpf_vm object or nullptr if the VM could not be created. + */ ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) { // Automatically free the VM when it goes out of scope. @@ -52,7 +59,7 @@ ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) // Failed to create the VM. // This is not interesting, as the fuzzer input is invalid. // Do not add it to the corpus. - return vm; + return {nullptr, nullptr}; } ubpf_toggle_undefined_behavior_check(vm.get(), true); @@ -66,8 +73,7 @@ ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) // This is not interesting, as the fuzzer input is invalid. // Do not add it to the corpus. free(error_message); - vm.reset(); - return vm; + return {nullptr, nullptr}; } ubpf_toggle_bounds_check(vm.get(), true); @@ -76,21 +82,29 @@ ubpf_vm_ptr create_ubpf_vm(const std::vector& program_code) // Failed to register the external dispatcher. // This is not interesting, as the fuzzer input is invalid. // Do not add it to the corpus. - vm.reset(); - return vm; + return {nullptr, nullptr}; } if (ubpf_set_instruction_limit(vm.get(), 10000, nullptr) != 0) { // Failed to set the instruction limit. // This is not interesting, as the fuzzer input is invalid. // Do not add it to the corpus. - vm.reset(); - return vm; + return {nullptr, nullptr}; } return vm; } +/** + * @brief Invoke the ubpf interpreter with the given program code and input memory. + * + * @param[in] program_code The program code to execute. + * @param[in,out] memory The input memory to use when executing the program. May be modified by the program. + * @param[in,out] ubpf_stack The stack to use when executing the program. May be modified by the program. + * @param[out] interpreter_result The result of the program execution. + * @return true if the program executed successfully. + * @return false if the program failed to execute. + */ bool call_ubpf_interpreter(const std::vector& program_code, std::vector& memory, std::vector& ubpf_stack, uint64_t& interpreter_result) { auto vm = create_ubpf_vm(program_code); @@ -110,6 +124,16 @@ bool call_ubpf_interpreter(const std::vector& program_code, std::vector return true; } +/** + * @brief Execute the given program code using the ubpf JIT. + * + * @param[in] program_code The program code to execute. + * @param[in,out] memory The input memory to use when executing the program. May be modified by the program. + * @param[in,out] ubpf_stack The stack to use when executing the program. May be modified by the program. + * @param[out] interpreter_result The result of the program execution. + * @return true if the program executed successfully. + * @return false if the program failed to execute. + */ bool call_ubpf_jit(const std::vector& program_code, std::vector& memory, std::vector& ubpf_stack, uint64_t& jit_result) { auto vm = create_ubpf_vm(program_code); @@ -136,8 +160,16 @@ bool call_ubpf_jit(const std::vector& program_code, std::vector& program_code, std::vector& memory, std::vector ubpf_stack, uint64_t& linux_jit_result); - +/** + * @brief Copy the program and memory from the input buffer into separate buffers. + * + * @param[in] data The input buffer from the fuzzer. + * @param[in] size The size of the input buffer. + * @param[out] program The program code extracted from the input buffer. + * @param[out] memory The input memory extracted from the input buffer. + * @return true if the input buffer was successfully split. + * @return false if the input buffer is malformed. + */ bool split_input(const uint8_t* data, std::size_t size, std::vector& program, std::vector& memory) { if (size < 4) @@ -218,8 +250,9 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) } if (!split_input(data, size, program, memory)) { - // The input is invalid. Not interesting. - return -1; + // The input was successfully split, but failed to split again. + // This should not happen. + assert(!"split_input failed"); } if (!call_ubpf_jit(program, memory, ubpf_stack, jit_result)) { diff --git a/vm/inc/ubpf.h b/vm/inc/ubpf.h index 13423bd2..554a25d8 100644 --- a/vm/inc/ubpf.h +++ b/vm/inc/ubpf.h @@ -546,7 +546,8 @@ extern "C" /** * @brief Enable or disable undefined behavior checks. Undefined behavior includes - * reading from uninitialized memory or using uninitialized registers. + * reading from uninitialized memory or using uninitialized registers. Default is disabled to + * preserve performance and compatibility with existing eBPF programs. * * @param[in] vm VM to enable or disable undefined behavior checks on. * @param[in] enable Enable undefined behavior checks if true, disable if false. diff --git a/vm/ubpf_vm.c b/vm/ubpf_vm.c index 3fba72f1..248a8732 100644 --- a/vm/ubpf_vm.c +++ b/vm/ubpf_vm.c @@ -409,7 +409,7 @@ ubpf_mark_shadow_stack( } if (access_start >= stack_start && access_end <= stack_end) { - // Shadow stack is an bit array, where each bit corresponds to 1 byte in the stack. + // Shadow stack is a bit array, where each bit corresponds to 1 byte in the stack. // If the bit is set, the memory is initialized. size_t offset = access_start - stack_start; for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { @@ -450,7 +450,7 @@ ubpf_check_shadow_stack( } if (access_start >= stack_start && access_end <= stack_end) { - // Shadow stack is an bit array, where each bit corresponds to 1 byte in the stack. + // Shadow stack is a bit array, where each bit corresponds to 1 byte in the stack. // If the bit is set, the memory is initialized. size_t offset = access_start - stack_start; for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { @@ -465,6 +465,8 @@ ubpf_check_shadow_stack( return true; } +#define REGISTER_TO_SHADOW_MASK(reg) (1 << (reg)) + /** * @brief Check if the registers being accessed by this instruction are initialized and mark the destination register as * initialized if it is. @@ -558,39 +560,43 @@ ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registe break; } - if (src_register_required && !(*shadow_registers & (1 << inst.src))) { + if (src_register_required && !(*shadow_registers & REGISTER_TO_SHADOW_MASK(inst.src))) { vm->error_printf(stderr, "Error: Source register r%d is not initialized.\n", inst.src); return false; } - if (dst_register_required && !(*shadow_registers & (1 << inst.dst))) { + if (dst_register_required && !(*shadow_registers & REGISTER_TO_SHADOW_MASK(inst.dst))) { vm->error_printf(stderr, "Error: Destination register r%d is not initialized.\n", inst.dst); return false; } if (dst_register_initialized) { - *shadow_registers |= 1 << inst.dst; + *shadow_registers |= REGISTER_TO_SHADOW_MASK(inst.dst); } if (inst.opcode == EBPF_OP_CALL) { if (inst.src == 0) { // Mark the return address register as initialized. - *shadow_registers |= 1 << 0; + *shadow_registers |= REGISTER_TO_SHADOW_MASK(0); // Mark r1-r5 as uninitialized. - *shadow_registers &= ~0x3e; + *shadow_registers &= + ~(REGISTER_TO_SHADOW_MASK(1) | REGISTER_TO_SHADOW_MASK(2) | REGISTER_TO_SHADOW_MASK(3) | + REGISTER_TO_SHADOW_MASK(4) | REGISTER_TO_SHADOW_MASK(5)); } else if (inst.src == 1) { // Do nothing, register state will be handled by the callee on return. } } if (inst.opcode == EBPF_OP_EXIT) { - if (!(*shadow_registers & (1 << 0))) { - vm->error_printf(stderr, "Error: Return address register r0 is not initialized.\n"); + if (!(*shadow_registers & REGISTER_TO_SHADOW_MASK(0))) { + vm->error_printf(stderr, "Error: Return value register r0 is not initialized.\n"); return false; } // Mark r1-r5 as uninitialized. - *shadow_registers &= ~0x3e; + *shadow_registers &= + ~(REGISTER_TO_SHADOW_MASK(1) | REGISTER_TO_SHADOW_MASK(2) | REGISTER_TO_SHADOW_MASK(3) | + REGISTER_TO_SHADOW_MASK(4) | REGISTER_TO_SHADOW_MASK(5)); } return true; @@ -648,7 +654,7 @@ ubpf_exec_ex( reg[10] = (uintptr_t)stack_start + stack_length; // Mark r1, r2, r10 as initialized. - shadow_registers |= (1 << 1) | (1 << 2) | (1 << 10); + shadow_registers |= REGISTER_TO_SHADOW_MASK(1) | REGISTER_TO_SHADOW_MASK(2) | REGISTER_TO_SHADOW_MASK(10); int instruction_limit = vm->instruction_limit;