From 5a4f6c692175a9cc417de0bf47c0b632023aa9d5 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Sat, 18 May 2024 13:12:47 -0700 Subject: [PATCH] Handle call local seperately from regular jump instructions (#467) * Handle call local seperately from regular jump instructions Signed-off-by: Alan Jowett * Fix after local call prolog increased in size Signed-off-by: Alan Jowett * Zero initialize jit_state Signed-off-by: Alan Jowett * Increment num_jumps after inserting into table Signed-off-by: Alan Jowett * Initialize bpf_function_prolog_size correctly Signed-off-by: Alan Jowett * Update ubpf_jit_arm64.c Assert that prolog size matches. * Update ubpf_jit_x86_64.c Assert that the prolog size matches. --------- Signed-off-by: Alan Jowett Signed-off-by: Alan Jowett Co-authored-by: Alan Jowett --- tests/factorial.data | 13 +++++++++++++ vm/ubpf_jit_arm64.c | 44 ++++++++++++++++++++++++++++++++++++++++--- vm/ubpf_jit_support.c | 5 +++++ vm/ubpf_jit_support.h | 4 ++++ vm/ubpf_jit_x86_64.c | 34 +++++++++++++++++++++++++++++++-- vm/ubpf_jit_x86_64.h | 13 +++++++++++++ 6 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 tests/factorial.data diff --git a/tests/factorial.data b/tests/factorial.data new file mode 100644 index 00000000..1d2c721e --- /dev/null +++ b/tests/factorial.data @@ -0,0 +1,13 @@ +-- asm +mov %r1, 1 +mov %r2, 20 +call local exponential +exit +exponential: +mul %r1, %r2 +sub %r2, 1 +jne %r2, 0, exponential +mov %r0, %r1 +exit +-- result +0x21C3677C82B40000 diff --git a/vm/ubpf_jit_arm64.c b/vm/ubpf_jit_arm64.c index 9f9361a4..0368c7e7 100644 --- a/vm/ubpf_jit_arm64.c +++ b/vm/ubpf_jit_arm64.c @@ -338,7 +338,14 @@ static void emit_unconditionalbranch_immediate( struct jit_state* state, enum UnconditionalBranchImmediateOpcode op, int32_t target_pc) { - emit_patchable_relative(state->offset, target_pc, 0, state->jumps, state->num_jumps++); + struct patchable_relative* table = state->jumps; + int* num_jumps = &state->num_jumps; + if (op == UBR_BL && target_pc != TARGET_PC_ENTER) { + table = state->local_calls; + num_jumps = &state->num_local_calls; + } + + emit_patchable_relative(state->offset, target_pc, 0, table, (*num_jumps)++); emit_instruction(state, op); } @@ -966,14 +973,22 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) // All checks for errors during the encoding of _this_ instruction // occur at the end of the loop. struct ebpf_inst inst = ubpf_fetch_instruction(vm, i); - state->pc_locs[i] = state->offset; if (i == 0 || vm->int_funcs[i]) { + size_t prolog_start = state->offset; emit_movewide_immediate(state, true, temp_register, ubpf_stack_usage_for_local_func(vm, i)); emit_addsub_immediate(state, true, AS_SUB, SP, SP, 16); emit_loadstorepair_immediate(state, LSP_STPX, temp_register, temp_register, SP, 0); + // Record the size of the prolog so that we can calculate offset when doing a local call. + if (state->bpf_function_prolog_size == 0) { + state->bpf_function_prolog_size = state->offset - prolog_start; + } else { + assert(state->bpf_function_prolog_size == state->offset - prolog_start); + } } + state->pc_locs[i] = state->offset; + enum Registers dst = map_register(inst.dst); enum Registers src = map_register(inst.src); uint8_t opcode = inst.opcode; @@ -1213,6 +1228,10 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) *errmsg = ubpf_error("Too many LEA calculations."); break; } + case TooManyLocalCalls: { + *errmsg = ubpf_error("Too many local calls."); + break; + } case UnexpectedInstruction: { // errmsg set at time the error was detected because the message requires // information about the unexpected instruction. @@ -1367,6 +1386,25 @@ resolve_leas(struct jit_state* state) return true; } +static bool +resolve_local_calls(struct jit_state* state) +{ + for (unsigned i = 0; i < state->num_local_calls; ++i) { + struct patchable_relative local_call = state->local_calls[i]; + + int32_t target_loc; + assert(local_call.target_offset == 0); + assert(local_call.target_pc != TARGET_PC_EXIT); + assert(local_call.target_pc != TARGET_PC_RETPOLINE); + target_loc = state->pc_locs[local_call.target_pc]; + + int32_t rel = target_loc - local_call.offset_loc; + rel -= state->bpf_function_prolog_size; + resolve_branch_immediate(state, local_call.offset_loc, rel); + } + return true; +} + bool ubpf_jit_update_dispatcher_arm64( struct ubpf_vm* vm, external_function_dispatcher_t new_dispatcher, uint8_t* buffer, size_t size, uint32_t offset) @@ -1411,7 +1449,7 @@ ubpf_translate_arm64(struct ubpf_vm* vm, uint8_t* buffer, size_t* size, enum Jit goto out; } - if (!resolve_jumps(&state) || !resolve_loads(&state) || !resolve_leas(&state)) { + if (!resolve_jumps(&state) || !resolve_loads(&state) || !resolve_leas(&state) || !resolve_local_calls(&state)) { compile_result.errmsg = ubpf_error("Could not patch the relative addresses in the JIT'd code."); goto out; } diff --git a/vm/ubpf_jit_support.c b/vm/ubpf_jit_support.c index c1f93425..8661be05 100644 --- a/vm/ubpf_jit_support.c +++ b/vm/ubpf_jit_support.c @@ -43,11 +43,14 @@ initialize_jit_state_result( state->jumps = calloc(UBPF_MAX_INSTS, sizeof(state->jumps[0])); state->loads = calloc(UBPF_MAX_INSTS, sizeof(state->loads[0])); state->leas = calloc(UBPF_MAX_INSTS, sizeof(state->leas[0])); + state->local_calls = calloc(UBPF_MAX_INSTS, sizeof(state->local_calls[0])); state->num_jumps = 0; state->num_loads = 0; state->num_leas = 0; + state->num_local_calls = 0; state->jit_status = NoError; state->jit_mode = jit_mode; + state->bpf_function_prolog_size = 0; if (!state->pc_locs || !state->jumps || !state->loads || !state->leas) { *errmsg = ubpf_error("Could not allocate space needed to JIT compile eBPF program"); @@ -69,6 +72,8 @@ release_jit_state_result(struct jit_state* state, struct ubpf_jit_result* compil state->loads = NULL; free(state->leas); state->leas = NULL; + free(state->local_calls); + state->local_calls = NULL; } void diff --git a/vm/ubpf_jit_support.h b/vm/ubpf_jit_support.h index 659bcc85..83ba66b8 100644 --- a/vm/ubpf_jit_support.h +++ b/vm/ubpf_jit_support.h @@ -34,6 +34,7 @@ enum JitProgress TooManyJumps, TooManyLoads, TooManyLeas, + TooManyLocalCalls, NotEnoughSpace, UnexpectedInstruction, UnknownInstruction @@ -90,10 +91,13 @@ struct jit_state struct patchable_relative* jumps; struct patchable_relative* loads; struct patchable_relative* leas; + struct patchable_relative* local_calls; int num_jumps; int num_loads; int num_leas; + int num_local_calls; uint32_t stack_size; + size_t bpf_function_prolog_size; // Count of bytes emitted at the start of the function. }; int diff --git a/vm/ubpf_jit_x86_64.c b/vm/ubpf_jit_x86_64.c index 00bd1094..4a50b526 100644 --- a/vm/ubpf_jit_x86_64.c +++ b/vm/ubpf_jit_x86_64.c @@ -135,7 +135,7 @@ emit_local_call(struct ubpf_vm* vm, struct jit_state* state, uint32_t target_pc) emit_alu64_imm32(state, 0x81, 5, RSP, 4 * sizeof(uint64_t)); #endif emit1(state, 0xe8); // e8 is the opcode for a CALL - emit_jump_address_reloc(state, target_pc); + emit_local_call_address_reloc(state, target_pc); #if defined(_WIN32) /* Deallocate home register space - 4 registers */ @@ -336,13 +336,13 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) } struct ebpf_inst inst = ubpf_fetch_instruction(vm, i); - state->pc_locs[i] = state->offset; int dst = map_register(inst.dst); int src = map_register(inst.src); uint32_t target_pc = i + inst.offset + 1; if (i == 0 || vm->int_funcs[i]) { + size_t prolog_start = state->offset; uint16_t stack_usage = ubpf_stack_usage_for_local_func(vm, i); emit_alu64_imm32(state, 0x81, 5, RSP, 8); emit1(state, 0x48); @@ -350,8 +350,16 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) emit1(state, 0x04); // Mod: 00b Reg: 000b RM: 100b emit1(state, 0x24); // Scale: 00b Index: 100b Base: 100b emit4(state, stack_usage); + // Record the size of the prolog so that we can calculate offset when doing a local call. + if (state->bpf_function_prolog_size == 0) { + state->bpf_function_prolog_size = state->offset - prolog_start; + } else { + assert(state->bpf_function_prolog_size == state->offset - prolog_start); + } } + state->pc_locs[i] = state->offset; + switch (inst.opcode) { case EBPF_OP_ADD_IMM: emit_alu32_imm32(state, 0x81, 0, dst, inst.imm); @@ -777,6 +785,10 @@ translate(struct ubpf_vm* vm, struct jit_state* state, char** errmsg) *errmsg = ubpf_error("Too many LEA calculations"); break; } + case TooManyLocalCalls: { + *errmsg = ubpf_error("Too many local calls"); + break; + } case UnexpectedInstruction: { // errmsg set at time the error was detected because the message requires // information about the unexpected instruction. @@ -976,6 +988,24 @@ resolve_patchable_relatives(struct jit_state* state) memcpy(offset_ptr, &rel, sizeof(uint32_t)); } + for (i = 0; i < state->num_local_calls; i++) { + struct patchable_relative local_call = state->local_calls[i]; + + int target_loc; + assert(local_call.target_offset == 0); + assert(local_call.target_pc != TARGET_PC_EXIT); + assert(local_call.target_pc != TARGET_PC_RETPOLINE); + + target_loc = state->pc_locs[local_call.target_pc]; + + /* Assumes call offset is at end of instruction */ + uint32_t rel = target_loc - (local_call.offset_loc + sizeof(uint32_t)); + rel -= state->bpf_function_prolog_size; // For the prolog inserted at the start of every local call. + + uint8_t* offset_ptr = &state->buf[local_call.offset_loc]; + memcpy(offset_ptr, &rel, sizeof(uint32_t)); + } + for (i = 0; i < state->num_loads; i++) { struct patchable_relative load = state->loads[i]; diff --git a/vm/ubpf_jit_x86_64.h b/vm/ubpf_jit_x86_64.h index 376e63c3..8cac1b8e 100644 --- a/vm/ubpf_jit_x86_64.h +++ b/vm/ubpf_jit_x86_64.h @@ -121,6 +121,19 @@ emit_jump_address_reloc(struct jit_state* state, int32_t target_pc) return target_address_offset; } +static uint32_t +emit_local_call_address_reloc(struct jit_state* state, int32_t target_pc) +{ + if (state->num_local_calls == UBPF_MAX_INSTS) { + state->jit_status = TooManyLocalCalls; + return 0; + } + uint32_t target_address_offset = state->offset; + emit_patchable_relative(state->offset, target_pc, 0, state->local_calls, state->num_local_calls++); + emit_4byte_offset_placeholder(state); + return target_address_offset; +} + static inline void emit_modrm(struct jit_state* state, int mod, int r, int m) {