From 9d46db69a4d978adbb1c1f18bfd2824834d79ad1 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Sun, 5 May 2024 15:30:08 -0700 Subject: [PATCH] Reject BPF program if uninit stack is accessed Reject programs if registers are used before intialized Signed-off-by: Alan Jowett --- libfuzzer/README.md | 94 ++++++++++++ libfuzzer/libfuzz_harness.cc | 115 ++++++++++++++- libfuzzer/split.sh | 37 +++++ ubpf/disassembler.py | 4 + vm/inc/ubpf.h | 18 +++ vm/ubpf_int.h | 1 + vm/ubpf_jit_x86_64.c | 9 +- vm/ubpf_jit_x86_64.h | 6 + vm/ubpf_vm.c | 269 ++++++++++++++++++++++++++++++++++- 9 files changed, 547 insertions(+), 6 deletions(-) create mode 100644 libfuzzer/README.md create mode 100755 libfuzzer/split.sh diff --git a/libfuzzer/README.md b/libfuzzer/README.md new file mode 100644 index 00000000..8eab2390 --- /dev/null +++ b/libfuzzer/README.md @@ -0,0 +1,94 @@ +# ubpf_fuzzer + +This is a libfuzzer based fuzzer. + +To build, run: +``` +cmake \ + -G Ninja \ + -S . \ + -B build \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DUBPF_ENABLE_LIBFUZZER=1 \ + -DCMAKE_BUILD_TYPE=Debug + +cmake --build build +``` + +To run: +Create folder for the corpus and artifacts for any crashes found, then run the fuzzer. + +``` +mkdir corpus +mkdir artifacts +build/bin/ubpf_fuzzer corpus -artifact_prefix=artifacts/ +``` + +Optionally, add the "-jobs=100" to gather 100 crashes at a time. + +This will produce a lot of output that looks like: +``` +#529745 REDUCE cov: 516 ft: 932 corp: 442/22Kb lim: 2875 exec/s: 264872 rss: 429Mb L: 50/188 MS: 3 CrossOver-ChangeBit-EraseBytes- +#529814 REDUCE cov: 516 ft: 932 corp: 442/22Kb lim: 2875 exec/s: 264907 rss: 429Mb L: 45/188 MS: 4 ChangeBit-ShuffleBytes-PersAutoDict-EraseBytes- DE: "\005\000\000\000\000\000\000\000"- +#530202 REDUCE cov: 516 ft: 932 corp: 442/22Kb lim: 2875 exec/s: 265101 rss: 429Mb L: 52/188 MS: 3 ChangeByte-ChangeASCIIInt-EraseBytes- +#531224 REDUCE cov: 518 ft: 934 corp: 443/22Kb lim: 2875 exec/s: 265612 rss: 429Mb L: 73/188 MS: 2 CopyPart-PersAutoDict- DE: "\001\000\000\000"- +#531750 REDUCE cov: 518 ft: 934 corp: 443/22Kb lim: 2875 exec/s: 265875 rss: 429Mb L: 45/188 MS: 1 EraseBytes- +#532127 REDUCE cov: 519 ft: 935 corp: 444/22Kb lim: 2875 exec/s: 266063 rss: 429Mb L: 46/188 MS: 2 ChangeBinInt-ChangeByte- +#532246 REDUCE cov: 519 ft: 935 corp: 444/22Kb lim: 2875 exec/s: 266123 rss: 429Mb L: 66/188 MS: 4 ChangeBit-CrossOver-ShuffleBytes-EraseBytes- +#532357 NEW cov: 520 ft: 936 corp: 445/22Kb lim: 2875 exec/s: 266178 rss: 429Mb L: 55/188 MS: 1 ChangeBinInt- +#532404 REDUCE cov: 520 ft: 936 corp: 445/22Kb lim: 2875 exec/s: 266202 rss: 429Mb L: 57/188 MS: 2 ChangeBit-EraseBytes- +#532486 REDUCE cov: 520 ft: 936 corp: 445/22Kb lim: 2875 exec/s: 266243 rss: 429Mb L: 44/188 MS: 2 EraseByte +``` + +Eventually it will probably crash and produce a message like: +``` +================================================================= +==376403==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x000000000000 bp 0x7ffca9d3cda0 sp 0x7ffca9d3cb98 T0) +==376403==Hint: pc points to the zero page. +==376403==The signal is caused by a READ memory access. +==376403==Hint: address points to the zero page. + #0 0x0 () + #1 0x50400001a48f () + +AddressSanitizer can not provide additional info. +SUMMARY: AddressSanitizer: SEGV () +==376403==ABORTING +MS: 1 ChangeByte-; base unit: cea14e5e2ecdc723b9beb640471a18b4ea529f75 +0x28,0x0,0x0,0x0,0xb4,0x50,0x10,0x6a,0x6a,0x4a,0x6a,0x2d,0x2e,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x4,0x21,0x0,0x0,0x0,0x0,0x95,0x95,0x26,0x21,0xfc,0xff,0xff,0xff,0x95,0x95,0x95,0x95,0x97,0xb7,0x97,0x97,0x0,0x8e,0x0,0x24, +(\000\000\000\264P\020jjJj-.\001\000\000\000\000\000\000\004!\000\000\000\000\225\225&!\374\377\377\377\225\225\225\225\227\267\227\227\000\216\000$ +artifact_prefix='artifacts/'; Test unit written to artifacts/crash-7036cbef2b568fa0b6e458a9c8062571a65144e1 +Base64: KAAAALRQEGpqSmotLgEAAAAAAAAEIQAAAACVlSYh/P///5WVlZWXt5eXAI4AJA== +``` + +To triage the crash, the crash can be post processed using: +``` +libfuzzer/split.sh artifacts/crash-7036cbef2b568fa0b6e458a9c8062571a65144e1 + + +Extracting program-7036cbef2b568fa0b6e458a9c8062571a65144e1... +Extracting memory-7036cbef2b568fa0b6e458a9c8062571a65144e1... +Disassembling program-7036cbef2b568fa0b6e458a9c8062571a65144e1... +Program size: 40 +Memory size: 2 +Disassembled program: +mov32 %r0, 0x2d6a4a6a +jgt32 %r1, %r0, +0 +add32 %r1, 0x95950000 +jgt32 %r1, 0x9595ffff, -4 +exit +Memory contents: +00000000: 0024 .$ +``` + +To repro the crash, you can run: +``` +build/bin/ubpf_fuzzer artifacts/crash-7036cbef2b568fa0b6e458a9c8062571a65144e1 +``` + +Or you can repro it using ubpf_test: +``` +build/bin/ubpf-test --mem artifacts/memory-7036cbef2b568fa0b6e458a9c8062571a65144e1 artifacts/program-7036cbef2b568fa0b6e458a9c8062571a65144e1 --jit +``` + diff --git a/libfuzzer/libfuzz_harness.cc b/libfuzzer/libfuzz_harness.cc index 4e6426af..4756caf8 100644 --- a/libfuzzer/libfuzz_harness.cc +++ b/libfuzzer/libfuzz_harness.cc @@ -89,6 +89,23 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) return -1; } + if ((program_length % sizeof(ebpf_inst)) != 0) { + // The program length needs to be a multiple of sizeof(ebpf_inst_t). + // This is not interesting, as the fuzzer input is invalid. + // Do not add it to the corpus. + return -1; + } + + for (std::size_t i = 0; i < program_length / sizeof(ebpf_inst); i++) { + ebpf_inst inst = reinterpret_cast(program_start)[i]; + if (inst.opcode == EBPF_OP_CALL && inst.src == 1) { + // Until local calls are fixed, reject local calls. + // This is not interesting, as the fuzzer input is invalid. + // Do not add it to the corpus. + return -1; + } + } + // Copy any input memory into a writable buffer. if (memory_length > 0) { memory.resize(memory_length); @@ -133,15 +150,109 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, std::size_t size) return -1; } - uint64_t result = 0; + uint64_t jit_result = 0; + uint64_t interpreter_result = 0; + + // Reserve 3 pages for the stack. + const std::size_t helper_function_stack_space = 3*4096; // 3 pages + const std::size_t prolog_size = 64; // Account for extra space needed for the prolog of the jitted function. + uint8_t ubpf_stack[UBPF_STACK_SIZE + helper_function_stack_space]; + memset(ubpf_stack, 0, sizeof(ubpf_stack)); + + // Tell the interpreter where the stack is. + ubpf_set_stack(vm.get(), (uint8_t*)ubpf_stack + sizeof(ubpf_stack) - UBPF_STACK_SIZE - prolog_size); // Execute the program using the input memory. - if (ubpf_exec(vm.get(), memory.data(), memory.size(), &result) != 0) { + if (ubpf_exec(vm.get(), memory.data(), memory.size(), &interpreter_result) != 0) { // The program passed validation during load, but failed during execution. // due to a runtime error. Add it to the corpus as it may be interesting. return 0; } + auto fn = ubpf_compile(vm.get(), &error_message); + if (fn == nullptr) { + // The program failed to compile. + // This is not interesting, as the fuzzer input is invalid. + // Do not add it to the corpus. + free(error_message); + return -1; + } + + memset(ubpf_stack, 0, sizeof(ubpf_stack)); + + // Setup the stack for the function call. + uintptr_t new_rsp = (uintptr_t)ubpf_stack + sizeof(ubpf_stack); + uintptr_t* rsp; + uintptr_t* old_rsp_ptr; + uintptr_t old_rdi; + uintptr_t old_rsi; + uintptr_t old_rax; + new_rsp &= ~0xf; + new_rsp -= sizeof(uintptr_t); + + rsp = (uintptr_t*)new_rsp; + + // Save space for the old value of rsp + *(--rsp) = 0; + old_rsp_ptr = rsp; + + // Store the function address. + *(--rsp) = (uintptr_t)fn; + + // Store the memory address. + *(--rsp) = (uintptr_t)memory.data(); + + // Store the memory size. + *(--rsp) = (uintptr_t)memory.size(); + + // Copy the current value of rsp into the reserved space. + __asm__ __volatile__("movq %%rsp, %0" : "=r"(*old_rsp_ptr)); + + // Copy the current value of rcx into the reserved space. + __asm__ __volatile__("movq %%rdi, %0" : "=r"(old_rdi)); + + // Copy the current value of rdx into the reserved space. + __asm__ __volatile__("movq %%rsi, %0" : "=r"(old_rsi)); + + // Copy the current value of rax into the reserved space. + __asm__ __volatile__("movq %%rax, %0" : "=r"(old_rax)); + + // Set the new value of rsp. + __asm__ __volatile__("movq %0, %%rsp" : : "r"(rsp)); + + // Pop arguments into registers. + __asm__ __volatile__("pop %rsi"); + __asm__ __volatile__("pop %rdi"); + + // Pop the function address into rax. + __asm__ __volatile__("pop %rax"); + + // Call the function. + __asm__ __volatile__("call *%rax"); + + // Pop the old value of rsp. + __asm__ __volatile__("pop %rsp"); + + // Copy rax into jit result. + __asm__ __volatile__("movq %%rax, %0" : "=r"(jit_result)); + + // Put back the old value of rax. + __asm__ __volatile__("movq %0, %%rax" : : "r"(old_rax)); + + // Put back the old value of rdx. + __asm__ __volatile__("movq %0, %%rsi" : : "r"(old_rsi)); + + // Put back the old value of rcx. + __asm__ __volatile__("movq %0, %%rdi" : : "r"(old_rdi)); + + + // If interpreter_result is not equal to jit_result, raise a fatal signal + if (interpreter_result != jit_result) { + printf("interpreter_result: %lx\n", interpreter_result); + printf("jit_result: %lx\n", jit_result); + throw std::runtime_error("interpreter_result != jit_result"); + } + // Program executed successfully. // Add it to the corpus as it may be interesting. return 0; diff --git a/libfuzzer/split.sh b/libfuzzer/split.sh new file mode 100755 index 00000000..2f562afa --- /dev/null +++ b/libfuzzer/split.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +# Split the file name into path and base name +path=$(dirname $1) +base=$(basename $1) + +# Get the first 4 bytes from the file (which is the length of the program) +input="$(xxd -p -l 4 $1)" +# Convert from little endian +input="${input:6:2}${input:4:2}${input:2:2}${input:0:2}" + +# Convert input from hex string to value +length=$((16#$input)) + +# Extract the hash part from the file name +hash=$(echo $base | cut -d'-' -f2-) + +# Copy the program to a file named program-$hash +echo "Extracting program-$hash..." +dd if=$1 of=$path/program-$hash bs=1 skip=4 count=$length 2> /dev/null + +echo "Extracting memory-$hash..." +# Copy the rest to a file named memory-$hash +dd if=$1 of=$path/memory-$hash bs=1 skip=$((4 + $length)) 2> /dev/null + +echo "Disassembling program-$hash..." +# Unassembly program using bin/ubpf-disassembler +bin/ubpf-disassembler $path/program-$hash > $path/program-$hash.asm + +echo "Program size: $(stat -c %s $path/program-$hash)" +echo "Memory size: $(stat -c %s $path/memory-$hash)" + +echo "Disassembled program:" +cat $path/program-$hash.asm + +echo "Memory contents:" +xxd $path/memory-$hash diff --git a/ubpf/disassembler.py b/ubpf/disassembler.py index b39f7cf5..bd78b255 100644 --- a/ubpf/disassembler.py +++ b/ubpf/disassembler.py @@ -128,6 +128,8 @@ def disassemble_one(data, offset): if opcode_name == "exit": return opcode_name elif opcode_name == "call": + if src_reg == 1: + opcode_name += " local" return "%s %s" % (opcode_name, I(imm)) elif opcode_name == "ja": return "%s %s" % (opcode_name, O(off)) @@ -143,6 +145,8 @@ def disassemble_one(data, offset): if opcode_name == "exit": return opcode_name elif opcode_name == "call": + if src_reg == 1: + opcode_name += " local" return "%s %s" % (opcode_name, I(imm)) elif opcode_name == "ja": return "%s %s" % (opcode_name, O(off)) diff --git a/vm/inc/ubpf.h b/vm/inc/ubpf.h index 6ee77cbe..25433721 100644 --- a/vm/inc/ubpf.h +++ b/vm/inc/ubpf.h @@ -344,6 +344,24 @@ extern "C" uint64_t* ubpf_get_registers(const struct ubpf_vm* vm); + /** + * @brief Override the storage location for the BPF stack in the VM. + * + * @param[in] vm The VM to set the stack storage in. + * @param[in] stack The stack storage. + */ + void + ubpf_set_stack(struct ubpf_vm* vm, uint8_t stack[UBPF_STACK_SIZE]); + + /** + * @brief Retrieve the storage location for the BPF stack in the VM. + * + * @param[in] vm The VM to get the stack storage from. + * @return uint8_t* A pointer to the stack storage. + */ + uint8_t* + ubpf_get_stack(const struct ubpf_vm* vm); + /** * @brief Optional secret to improve ROP protection. * diff --git a/vm/ubpf_int.h b/vm/ubpf_int.h index 1389886b..c37494f6 100644 --- a/vm/ubpf_int.h +++ b/vm/ubpf_int.h @@ -73,6 +73,7 @@ struct ubpf_vm int instruction_limit; #ifdef DEBUG uint64_t* regs; + uintptr_t stack; #endif }; diff --git a/vm/ubpf_jit_x86_64.c b/vm/ubpf_jit_x86_64.c index 467d6daa..4b3b1cda 100644 --- a/vm/ubpf_jit_x86_64.c +++ b/vm/ubpf_jit_x86_64.c @@ -301,7 +301,6 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } struct ebpf_inst inst = ubpf_fetch_instruction(vm, i); - state->pc_locs[i] = state->offset; int dst = map_register(inst.dst); int src = map_register(inst.src); @@ -314,6 +313,8 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit_alu64_imm32(state, 0x81, 5, RSP, 8); } + state->pc_locs[i] = state->offset; + switch (inst.opcode) { case EBPF_OP_ADD_IMM: emit_alu32_imm32(state, 0x81, 0, dst, inst.imm); @@ -723,6 +724,12 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) state->jit_status = UnknownInstruction; *errmsg = ubpf_error("Unknown instruction at PC %d: opcode %02x", i, inst.opcode); } + + // If this is a ALU32 instruction, truncate the target register to 32 bits. + if (((inst.opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU) && + (inst.opcode & EBPF_ALU_OP_MASK) != 0xd0) { + emit_truncate_u32(state, dst); + } } if (state->jit_status != NoError) { diff --git a/vm/ubpf_jit_x86_64.h b/vm/ubpf_jit_x86_64.h index 376e63c3..44bfdce3 100644 --- a/vm/ubpf_jit_x86_64.h +++ b/vm/ubpf_jit_x86_64.h @@ -212,6 +212,12 @@ emit_alu32_imm8(struct jit_state* state, int op, int src, int dst, int8_t imm) emit1(state, imm); } +static inline void +emit_truncate_u32(struct jit_state* state, int destination) +{ + emit_alu32_imm32(state, 0x81, 4, destination, UINT32_MAX); +} + /* REX.W prefix and ModRM byte */ /* We use the MR encoding when there is a choice */ /* 'src' is often used as an opcode extension */ diff --git a/vm/ubpf_vm.c b/vm/ubpf_vm.c index e2d71ae0..076ec169 100644 --- a/vm/ubpf_vm.c +++ b/vm/ubpf_vm.c @@ -356,6 +356,207 @@ ubpf_mem_store(uint64_t address, uint64_t value, size_t size) } } +/** + * @brief Mark the bits in the shadow stack corresponding to the address if it is within the stack bounds. + * + * @param[in] stack The base address of the stack. + * @param[in] shadow_stack The base address of the shadow stack. + * @param[in] address The address being written to. + * @param[in] size The number of bytes being written. + */ +static inline +void ubpf_mark_shadow_stack(const struct ubpf_vm* vm, uint64_t* stack, uint8_t* shadow_stack, void* address, size_t size) +{ + if (!vm->bounds_check_enabled) { + return; + } + + uintptr_t access_start = (uintptr_t)address; + uintptr_t access_end = access_start + size; + uintptr_t stack_start = (uintptr_t)stack; + uintptr_t stack_end = stack_start + UBPF_STACK_SIZE; + + if (access_start > access_end) { + // Overflow + return; + } + + if (access_start >= stack_start && access_end <= stack_end) { + // Shadow stack is an bit array, where each bit corresponds to 1 byte in the stack. + // If the bit is set, the memory is initialized. + size_t offset = access_start - stack_start; + for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { + // Convert test_bit into offset + mask to test against the shadow stack. + size_t bit_offset = test_bit / 8; + size_t bit_mask = 1 << (test_bit % 8); + shadow_stack[bit_offset] |= bit_mask; + } + } +} + +/** + * @brief Check if the address is within the stack bounds and the shadow stack is marked for the address. + * + * @param[in] stack The base address of the stack. + * @param[in] shadow_stack The base address of the shadow stack. + * @param[in] address The address being read from. + * @param[in] size The number of bytes being read. + * @return true - The read is from initialized memory or is not within the stack bounds. + * @return false - The read is from uninitialized memory within the stack bounds. + */ +static inline +bool ubpf_check_shadow_stack(const struct ubpf_vm* vm, uint64_t* stack, uint8_t* shadow_stack, void* address, size_t size) +{ + if (!vm->bounds_check_enabled) { + return true; + } + + uintptr_t access_start= (uintptr_t)address; + uintptr_t access_end = access_start + size; + uintptr_t stack_start = (uintptr_t)stack; + uintptr_t stack_end = stack_start + UBPF_STACK_SIZE; + + if (access_start > access_end) { + // Overflow + return true; + } + + if (access_start >= stack_start && access_end <= stack_end) { + // Shadow stack is an bit array, where each bit corresponds to 1 byte in the stack. + // If the bit is set, the memory is initialized. + size_t offset = access_start - stack_start; + for (size_t test_bit = offset; test_bit < offset + size; test_bit++) { + // Convert test_bit into offset + mask to test against the shadow stack. + size_t bit_offset = test_bit / 8; + size_t bit_mask = 1 << (test_bit % 8); + if ((shadow_stack[bit_offset] & bit_mask) == 0) { + return false; + } + } + } + return true; +} + +/** + * @brief Check if the registers being accessed by this instruction are initialized and mark the destination register as + * initialized if it is. + * + * @param[in] vm The VM instance. + * @param[in,out] shadow_registers Storage for the shadow register state. + * @param[in] inst The instruction being executed. + * @return true - The registers are initialized. + * @return false - The registers are not initialized - an error message has been printed. + */ +static inline bool +ubpf_validate_shadow_register(const struct ubpf_vm* vm, uint16_t* shadow_registers, struct ebpf_inst inst) +{ + bool src_register_required = false; + bool dst_register_required = false; + bool dst_register_initialized = false; + + switch (inst.opcode & EBPF_CLS_MASK) { + // Load instructions initialize the destination register. + case EBPF_CLS_LD: + dst_register_initialized = true; + break; + // Load indirect instructions initialize the destination register and require the source register to be initialized. + case EBPF_CLS_LDX: + src_register_required = true; + dst_register_initialized = true; + break; + // Store instructions require the destination register to be initialized. + case EBPF_CLS_ST: + dst_register_required = true; + break; + // Store indirect instructions require both the source and destination registers to be initialized. + case EBPF_CLS_STX: + dst_register_required = true; + src_register_required = true; + break; + case EBPF_CLS_ALU: + case EBPF_CLS_ALU64: + // Source register is required if the EBPF_SRC_REG bit is set. + src_register_required = inst.opcode & EBPF_SRC_REG; + dst_register_initialized = true; + switch (inst.opcode & EBPF_ALU_OP_MASK) { + case 0x00: // EBPF_OP_ADD + case 0x10: // EBPF_OP_SUB + case 0x20: // EBPF_OP_MUL + case 0x30: // EBPF_OP_DIV + case 0x40: // EBPF_OP_OR + case 0x50: // EBPF_OP_AND + case 0x60: // EBPF_OP_LSH + case 0x70: // EBPF_OP_RSH + case 0x80: // EBPF_OP_NEG + case 0x90: // EBPF_OP_MOD + case 0xa0: // EBPF_OP_XOR + case 0xc0: // EBPF_OP_ARSH + case 0xd0: // EBPF_OP_LE + dst_register_required = true; + break; + case 0xb0: // EBPF_OP_MOV + // Destination register is initialized. + break; + } + break; + case EBPF_CLS_JMP: + case EBPF_CLS_JMP32: + // Source register is required if the EBPF_SRC_REG bit is set. + src_register_required = inst.opcode & EBPF_SRC_REG; + switch (inst.opcode & EBPF_JMP_OP_MASK) { + case EBPF_MODE_JA: + case EBPF_MODE_CALL: + case EBPF_MODE_EXIT: + src_register_required = false; + break; + case EBPF_MODE_JEQ: + case EBPF_MODE_JGT: + case EBPF_MODE_JGE: + case EBPF_MODE_JSET: + case EBPF_MODE_JNE: + case EBPF_MODE_JSGT: + case EBPF_MODE_JSGE: + case EBPF_MODE_JLT: + case EBPF_MODE_JLE: + case EBPF_MODE_JSLT: + case EBPF_MODE_JSLE: + dst_register_required = true; + break; + } + break; + } + + if (src_register_required && !(*shadow_registers & (1 << inst.src))) { + vm->error_printf(stderr, "Error: Source register r%d is not initialized.\n", inst.src); + return false; + } + + if (dst_register_required && !(*shadow_registers & (1 << inst.dst))) { + vm->error_printf(stderr, "Error: Destination register r%d is not initialized.\n", inst.dst); + return false; + } + + if (dst_register_initialized) { + *shadow_registers |= 1 << inst.dst; + } + + if (inst.opcode == EBPF_OP_CALL) { + // Mark the return address register as initialized. + *shadow_registers |= 1 << 0; + } + + if (inst.opcode == EBPF_OP_EXIT) { + if (!(*shadow_registers & (1 << 0))) { + vm->error_printf(stderr, "Error: Return address register r0 is not initialized.\n"); + return false; + } + // Mark r1-r5 as uninitialized. + *shadow_registers &= ~0x3e; + } + + return true; +} + int ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_return_value) { @@ -370,6 +571,7 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret // Windows Kernel mode limits stack usage to 12K, so we need to allocate it dynamically. #if defined(NTDDI_VERSION) && defined(WINNT) uint64_t* stack = NULL; + uint8_t* shadow_stack = NULL; struct ubpf_stack_frame* stack_frames = NULL; stack = calloc(UBPF_STACK_SIZE, 1); @@ -378,6 +580,12 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret goto cleanup; } + shadow_stack = calloc(UBPF_STACK_SIZE / 8, 1); + if (!shadow_stack) { + return_value = -1; + goto cleanup; + } + stack_frames = calloc(UBPF_MAX_CALL_DEPTH, sizeof(struct ubpf_stack_frame)); if (!stack_frames) { return_value = -1; @@ -385,7 +593,9 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret } #else - uint64_t stack[UBPF_STACK_SIZE / sizeof(uint64_t)]; + uint64_t ubpf_stack[UBPF_STACK_SIZE / sizeof(uint64_t)]; + uint64_t* stack = ubpf_stack; + uint8_t shadow_stack[UBPF_STACK_SIZE / 8]; struct ubpf_stack_frame stack_frames[UBPF_MAX_CALL_DEPTH] = { 0, }; @@ -401,16 +611,26 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret reg = vm->regs; else reg = _reg; + + if (vm->stack) { + stack = (uint64_t*)vm->stack; + } #else reg = _reg; #endif + uint16_t shadow_registers = 0; // Bit mask of registers that have been written to. reg[1] = (uintptr_t)mem; reg[2] = (uint64_t)mem_len; reg[10] = (uintptr_t)stack + UBPF_STACK_SIZE; + + // Mark r1, r2, r10 as initialized. + shadow_registers |= (1 << 1) | (1 << 2) | (1 << 10); + int instruction_limit = vm->instruction_limit; + while (1) { const uint16_t cur_pc = pc; if (pc >= vm->num_insts) { @@ -423,6 +643,11 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret } struct ebpf_inst inst = ubpf_fetch_instruction(vm, pc++); + if (!ubpf_validate_shadow_register(vm, &shadow_registers, inst)) { + return_value = -1; + goto cleanup; + } + switch (inst.opcode) { case EBPF_OP_ADD_IMM: reg[inst.dst] += inst.imm; @@ -624,6 +849,10 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret */ #define BOUNDS_CHECK_LOAD(size) \ do { \ + if (!ubpf_check_shadow_stack(vm, stack, shadow_stack, (char*)reg[inst.src] + inst.offset, size)) { \ + return_value = -1; \ + goto cleanup; \ + } \ if (!bounds_check(vm, (char*)reg[inst.src] + inst.offset, size, "load", cur_pc, mem, mem_len, stack)) { \ return_value = -1; \ goto cleanup; \ @@ -631,6 +860,7 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret } while (0) #define BOUNDS_CHECK_STORE(size) \ do { \ + ubpf_mark_shadow_stack(vm, stack, shadow_stack, (char*)reg[inst.dst] + inst.offset, size); \ if (!bounds_check(vm, (char*)reg[inst.dst] + inst.offset, size, "store", cur_pc, mem, mem_len, stack)) { \ return_value = -1; \ goto cleanup; \ @@ -711,7 +941,7 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret } break; case EBPF_OP_JEQ32_REG: - if (u32(reg[inst.dst]) == reg[inst.src]) { + if (u32(reg[inst.dst]) == u32(reg[inst.src])) { pc += inst.offset; } break; @@ -971,12 +1201,19 @@ ubpf_exec(const struct ubpf_vm* vm, void* mem, size_t mem_len, uint64_t* bpf_ret // valid. break; } + if (((inst.opcode & EBPF_CLS_MASK) == EBPF_CLS_ALU) && + (inst.opcode & EBPF_ALU_OP_MASK) != 0xd0) { + reg[inst.dst] &= UINT32_MAX; + } } cleanup: #if defined(NTDDI_VERSION) && defined(WINNT) free(stack_frames); - free(stack); + + if (!vm->stack) { + free(stack); + } #endif return return_value; } @@ -1295,6 +1532,18 @@ ubpf_get_registers(const struct ubpf_vm* vm) { return vm->regs; } + +void +ubpf_set_stack(struct ubpf_vm* vm, uint8_t stack[UBPF_STACK_SIZE]) +{ + vm->stack = (uintptr_t)stack; +} + +uint8_t* +ubpf_get_stack(const struct ubpf_vm* vm) +{ + return (uint8_t*)vm->stack; +} #else void ubpf_set_registers(struct ubpf_vm* vm, uint64_t* regs) @@ -1312,6 +1561,20 @@ ubpf_get_registers(const struct ubpf_vm* vm) return NULL; } +void ubpf_set_stack(struct ubpf_vm* vm, uint8_t stack[UBPF_STACK_SIZE]) +{ + (void)vm; + (void)stack; + fprintf(stderr, "uBPF warning: stack is not exposed in release mode. Please recompile in debug mode\n"); +} + +uint8_t* +ubpf_get_stack(const struct ubpf_vm* vm) +{ + (void)vm; + fprintf(stderr, "uBPF warning: stack is not exposed in release mode. Please recompile in debug mode\n"); + return NULL; +} #endif typedef struct _ebpf_encoded_inst