From 4d6b5b6e345d8adf06e5301a88d75a67660307b5 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 24 Apr 2026 08:30:57 +0200 Subject: [PATCH] JIT: rework register cache Signed-off-by: Paul Guyot --- libs/jit/src/CMakeLists.txt | 10 ++ libs/jit/src/jit_aarch64.erl | 55 ++++--- libs/jit/src/jit_arm32.erl | 167 ++++++++++---------- libs/jit/src/jit_armv6m.erl | 199 ++++++++++++------------ libs/jit/src/jit_regs.erl | 17 +- libs/jit/src/jit_riscv_impl.hrl | 78 +++++----- libs/jit/src/jit_wasm32.erl | 12 +- libs/jit/src/jit_x86_64.erl | 78 +++++----- tests/libs/jit/jit_aarch64_tests.erl | 177 +++++++++++++++++++++ tests/libs/jit/jit_arm32_tests.erl | 150 +++++++++++++++++- tests/libs/jit/jit_armv6m_tests.erl | 200 +++++++++++++++++++++++- tests/libs/jit/jit_regs_tests.erl | 14 ++ tests/libs/jit/jit_riscv32_tests.erl | 192 +++++++++++++++++++++++ tests/libs/jit/jit_riscv64_tests.erl | 194 +++++++++++++++++++++++ tests/libs/jit/jit_wasm32_tests.erl | 223 +++++++++++++++++++++++++++ tests/libs/jit/jit_x86_64_tests.erl | 198 ++++++++++++++++++++++++ 16 files changed, 1674 insertions(+), 290 deletions(-) diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 6545d63a39..bb57b0fe8d 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -67,6 +67,16 @@ if (NOT AVM_DISABLE_JIT_DWARF) ) endif() +# jit_riscv32.erl and jit_riscv64.erl include the shared implementation in +# jit_riscv_impl.hrl, so changes to that header must force recompilation. +foreach(riscv_module jit_riscv32 jit_riscv64) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${riscv_module}.beam + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit_riscv_impl.hrl + APPEND + ) +endforeach() + set(JIT_VERSION ${ATOMVM_BASE_VERSION}) install( diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 6bf7150da8..d1054e9f85 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -593,7 +593,7 @@ call_primitive_last( stream = Stream3, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State1#state.regs) + regs = jit_regs:unreachable(State1#state.regs) }. %%----------------------------------------------------------------------------- @@ -656,13 +656,17 @@ jump_to_label( Rel = LabelOffset - Offset, I1 = jit_aarch64_asm:b(Rel), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}; false -> % Label not yet known, emit placeholder and add relocation I1 = jit_aarch64_asm:b(0), Reloc = {Label, Offset, b}, Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1, branches = [Reloc | AccBranches]} + State#state{ + stream = Stream1, + branches = [Reloc | AccBranches], + regs = jit_regs:unreachable(State#state.regs) + } end. jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> @@ -670,7 +674,7 @@ jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, T Rel = TargetOffset - Offset, I1 = jit_aarch64_asm:b(Rel), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}. + State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}. %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. @@ -709,7 +713,7 @@ jump_to_continuation( stream = Stream1, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State#state.regs) + regs = jit_regs:unreachable(State#state.regs) }. %% @private @@ -1605,7 +1609,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when State1 = move_to_vm_register_emit( State0#state{stream = Stream1, available_regs = AT}, Temp, Dest ), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), State1#state{available_regs = AR0, regs = Regs1}; % Source is a VM register move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> @@ -1617,7 +1621,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, State1 = move_to_vm_register_emit( State0#state{stream = Stream1, available_regs = AT}, Temp, Dest ), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, ?MAX_REG}), State1#state{available_regs = AR0, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> Temp = first_avail(AR0), @@ -1628,7 +1632,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest State1 = move_to_vm_register_emit( State0#state{stream = Stream1, available_regs = AT}, Temp, Dest ), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), State1#state{available_regs = AR0, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> Temp = first_avail(AR0), @@ -1651,7 +1655,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest State1 = move_to_vm_register_emit( State0#state{stream = Stream1, available_regs = AT}, Temp, Dest ), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), State1#state{available_regs = AR0, regs = Regs1}; % term_to_float move_to_vm_register_emit( @@ -1696,7 +1700,7 @@ move_array_element( I2 = jit_aarch64_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), - Regs2 = jit_regs:invalidate_reg(Regs1, Temp), + Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = Available, regs = Regs0} = @@ -2187,33 +2191,39 @@ move_to_native_register( Regs1 = jit_regs:set_contents(Regs0, RegDst, {imm, RegSrc}), State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {ptr, Reg}, RegDst ) when ?IS_GPR(Reg) -> I1 = jit_aarch64_asm:ldr(RegDst, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:invalidate_reg(Regs0, RegDst), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + {x_reg, extra}, + RegDst ) -> I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(?MAX_REG)), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, extra}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, X}, RegDst ) when X < ?MAX_REG -> I1 = jit_aarch64_asm:ldr(RegDst, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {y_reg, Y}, RegDst ) -> I1 = jit_aarch64_asm:ldr(RegDst, ?Y_REGS), I2 = jit_aarch64_asm:ldr(RegDst, {RegDst, Y * ?WORD_SIZE}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}. + Regs1 = jit_regs:set_contents(Regs0, RegDst, {y_reg, Y}), + State#state{stream = Stream1, regs = Regs1}. %%----------------------------------------------------------------------------- %% @doc Copy a value to a native register, allocating a new register from the @@ -2296,7 +2306,7 @@ move_to_cp( I3 = jit_aarch64_asm:str(Reg, ?CP), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + Regs1 = jit_regs:set_contents(Regs0, Reg, {y_reg, Y}), State#state{stream = Stream1, regs = Regs1}. %%----------------------------------------------------------------------------- @@ -2424,7 +2434,7 @@ get_module_index( I2 = jit_aarch64_asm:ldr_w(Reg, ?MODULE_INDEX(Reg)), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + Regs1 = jit_regs:set_contents(Regs0, Reg, module_index), { State#state{ stream = Stream1, @@ -2725,9 +2735,8 @@ decrement_reductions_and_maybe_schedule_next( Stream3, BNEOffset, <> ), State3 = merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs), - %% The schedule_next path is a tail call (dead end), so the register tracking - %% from the non-taken path (State1) is what matters at the continuation. - State3#state{regs = State1#state.regs}. + %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. + State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. %%----------------------------------------------------------------------------- %% @doc Emit a call to a label with automatic scheduling. Decrements reductions diff --git a/libs/jit/src/jit_arm32.erl b/libs/jit/src/jit_arm32.erl index f6639ad24b..ebf06ff72d 100644 --- a/libs/jit/src/jit_arm32.erl +++ b/libs/jit/src/jit_arm32.erl @@ -688,7 +688,7 @@ call_primitive_last( State5 = State4#state{ available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State4#state.regs) + regs = jit_regs:unreachable(State4#state.regs) }, flush_literal_pool(State5). @@ -760,12 +760,11 @@ return_if_not_equal_to_ctx( I2 = jit_arm32_asm:b(eq, 4 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), RegBit = reg_bit(Reg), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{ stream = Stream1, available_regs = AvailableRegs0 bor RegBit, used_regs = UsedRegs0 band (bnot RegBit), - regs = Regs1 + regs = Regs0 }. %%----------------------------------------------------------------------------- @@ -784,7 +783,7 @@ jump_to_label( {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), %% After unconditional jump, register tracking is dead until next label - State2 = State1#state{stream = Stream1, regs = jit_regs:invalidate_all(State1#state.regs)}, + State2 = State1#state{stream = Stream1, regs = jit_regs:unreachable(State1#state.regs)}, flush_literal_pool(State2). %% @doc Emit an unconditional branch to an absolute offset. @@ -793,7 +792,7 @@ jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, T Offset = StreamModule:offset(Stream0), CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), Stream1 = StreamModule:append(Stream0, CodeBlock), - State2 = State#state{stream = Stream1, regs = jit_regs:invalidate_all(State#state.regs)}, + State2 = State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}, flush_literal_pool(State2). %%----------------------------------------------------------------------------- @@ -854,7 +853,12 @@ jump_to_continuation( Code = <>, Stream2 = StreamModule:append(State1#state.stream, Code), % Free all registers as this is a terminal instruction - State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0}, + State2 = State1#state{ + stream = Stream2, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, + regs = jit_regs:unreachable(State1#state.regs) + }, flush_literal_pool(State2). branch_to_offset_code(_State, Offset, TargetOffset) -> @@ -1975,14 +1979,12 @@ set_registers_args1( Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; set_registers_args1( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + #state{available_regs = AvailRegs} = State, {y_reg, X}, Reg, _StackOffset ) -> - Code = ldr_y_reg(Reg, X, AvailRegs), - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}; + ldr_y_reg(State, Reg, X, AvailRegs); set_registers_args1( #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset ) when @@ -2121,19 +2123,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> Temp = first_avail(AR0), AT = AR0 band (bnot reg_bit(Temp)), - Code = ldr_y_reg(Temp, Y, AT), - Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), - % ldr_y_reg clobbers first_avail(AT) as a hidden temp for loading Y_REGS pointer - Regs0a = - case AT of - 0 -> State0#state.regs; - _ -> jit_regs:invalidate_reg(State0#state.regs, first_avail(AT)) - end, - State0a = State0#state{ - stream = Stream1, - available_regs = AT, - regs = Regs0a - }, + State0a = ldr_y_reg(State0#state{available_regs = AT}, Temp, Y, AT), State1 = move_to_vm_register(State0a, Temp, Dest), Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), State1#state{available_regs = AR0, regs = Regs1}; @@ -2738,11 +2728,8 @@ move_to_native_register_emit( }; move_to_native_register_emit( #state{ - stream_module = StreamModule, - stream = Stream0, available_regs = Avail, - used_regs = Used, - regs = Regs0 + used_regs = Used } = State, {y_reg, Y}, Contents @@ -2750,22 +2737,15 @@ move_to_native_register_emit( Reg = first_avail(Avail), Bit = reg_bit(Reg), AvailT = Avail band (bnot Bit), - Code = ldr_y_reg(Reg, Y, AvailT), - Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), - % ldr_y_reg clobbers first_avail(AvailT) as a hidden temp for loading Y_REGS pointer - Regs2 = - case AvailT of - 0 -> Regs1; - _ -> jit_regs:invalidate_reg(Regs1, first_avail(AvailT)) - end, + State1 = ldr_y_reg( + State#state{available_regs = AvailT, used_regs = Used bor Bit}, + Reg, + Y, + AvailT + ), + Regs1 = jit_regs:set_contents(State1#state.regs, Reg, Contents), { - State#state{ - stream = Stream1, - available_regs = AvailT, - used_regs = Used bor Bit, - regs = Regs2 - }, + State1#state{regs = Regs1}, Reg }; move_to_native_register_emit( @@ -2809,40 +2789,39 @@ move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> Regs1 = jit_regs:set_contents(Regs0, RegDst, {imm, ValSrc}), State1#state{regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {ptr, Reg}, RegDst ) when ?IS_GPR(Reg) -> I1 = jit_arm32_asm:ldr(al, RegDst, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:invalidate_reg(Regs0, RegDst), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + {x_reg, extra}, + RegDst ) -> I1 = jit_arm32_asm:ldr(al, RegDst, ?X_REG(?MAX_REG)), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, extra}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, X}, RegDst ) when X < ?MAX_REG -> I1 = jit_arm32_asm:ldr(al, RegDst, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AT, regs = Regs0} = - State, + #state{available_regs = AT} = State, {y_reg, Y}, RegDst ) -> - Code = ldr_y_reg(RegDst, Y, AT), - Stream1 = StreamModule:append(Stream0, Code), - % ldr_y_reg clobbers first_avail(AT) as a hidden temp for loading Y_REGS pointer - Regs1 = - case AT of - 0 -> Regs0; - _ -> jit_regs:invalidate_reg(Regs0, first_avail(AT)) - end, - State#state{stream = Stream1, regs = Regs1}; + State1 = ldr_y_reg(State, RegDst, Y, AT), + #state{regs = Regs0} = State1, + Regs1 = jit_regs:set_contents(Regs0, RegDst, {y_reg, Y}), + State1#state{regs = Regs1}; move_to_native_register( #state{ stream_module = StreamModule, @@ -2912,24 +2891,15 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{available_regs = Avail} = State, {y_reg, Y} ) -> Reg = first_avail(Avail), AvailT = Avail band (bnot reg_bit(Reg)), - I1 = ldr_y_reg(Reg, Y, AvailT), + State1 = ldr_y_reg(State, Reg, Y, AvailT), I2 = jit_arm32_asm:str(al, Reg, ?CP), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - % ldr_y_reg clobbers first_avail(AvailT) as a hidden temp for loading Y_REGS pointer - Regs1a = jit_regs:invalidate_reg(Regs0, Reg), - Regs1 = - case AvailT of - 0 -> Regs1a; - _ -> jit_regs:invalidate_reg(Regs1a, first_avail(AvailT)) - end, - State#state{stream = Stream1, regs = Regs1}. + Stream1 = (State1#state.stream_module):append(State1#state.stream, I2), + State1#state{stream = Stream1}. increment_sp( #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = @@ -3043,13 +3013,14 @@ get_module_index( I2 = jit_arm32_asm:ldr(al, Reg, ?MODULE_INDEX(Reg)), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, TempJitState), Reg), + Regs1 = jit_regs:invalidate_reg(Regs0, TempJitState), + Regs2 = jit_regs:set_contents(Regs1, Reg, module_index), { State#state{ stream = Stream1, available_regs = Avail1, used_regs = UsedRegs0 bor RegBit, - regs = Regs1 + regs = Regs2 }, Reg }. @@ -3500,8 +3471,8 @@ decrement_reductions_and_maybe_schedule_next( Stream4, BNEOffset, <> ), State3 = merge_used_regs(State2#state{stream = Stream5}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. + State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -3693,13 +3664,26 @@ str_y_reg(SrcReg, Y, TempReg1, _AvailMask) -> <>. %% Helper function to generate ldr instruction with y_reg offset, handling large offsets -ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0, Y * 4 =< 4095 -> +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + DstReg, + Y, + AvailMask +) when AvailMask =/= 0, Y * 4 =< 4095 -> % Small offset - use immediate addressing TempReg = first_avail(AvailMask), I1 = jit_arm32_asm:ldr(al, TempReg, ?Y_REGS), I2 = jit_arm32_asm:ldr(al, DstReg, {TempReg, Y * 4}), - <>; -ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0 -> + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, DstReg), TempReg), + State#state{stream = Stream1, regs = Regs1}; +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + DstReg, + Y, + AvailMask +) when AvailMask =/= 0 -> % Large offset (Y * 4 > 4095) - split into base + 4080 + remainder % 4080 (0xFF0) is the largest ARM-encodable immediate close to the 4095 ldr/str limit TempReg = first_avail(AvailMask), @@ -3709,13 +3693,25 @@ ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0 -> I1 = jit_arm32_asm:ldr(al, TempReg, ?Y_REGS), I2 = jit_arm32_asm:add(al, TempReg, TempReg, BaseOffset), I3 = jit_arm32_asm:ldr(al, DstReg, {TempReg, Remainder}), - <>; -ldr_y_reg(DstReg, Y, 0) when Y * 4 =< 4095 -> + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, DstReg), TempReg), + State#state{stream = Stream1, regs = Regs1}; +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DstReg, Y, 0 +) when + Y * 4 =< 4095 +-> % Small offset, no registers available - use DstReg as temp I1 = jit_arm32_asm:ldr(al, DstReg, ?Y_REGS), I2 = jit_arm32_asm:ldr(al, DstReg, {DstReg, Y * 4}), - <>; -ldr_y_reg(DstReg, Y, 0) -> + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, DstReg), + State#state{stream = Stream1, regs = Regs1}; +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DstReg, Y, 0 +) -> % Large offset (Y * 4 > 4095), no registers available % Use DstReg as temp: load Y_REGS base, add 4080, ldr with remainder % 4080 (0xFF0) is the largest ARM-encodable immediate close to the 4095 ldr/str limit @@ -3725,7 +3721,10 @@ ldr_y_reg(DstReg, Y, 0) -> I1 = jit_arm32_asm:ldr(al, DstReg, ?Y_REGS), I2 = jit_arm32_asm:add(al, DstReg, DstReg, BaseOffset), I3 = jit_arm32_asm:ldr(al, DstReg, {DstReg, Remainder}), - <>. + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, DstReg), + State#state{stream = Stream1, regs = Regs1}. reg_bit(r0) -> ?REG_BIT_R0; reg_bit(r1) -> ?REG_BIT_R1; diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index a8c816899a..bd0c67b419 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -773,7 +773,7 @@ call_primitive_last( State5 = State4#state{ available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State4#state.regs) + regs = jit_regs:unreachable(State4#state.regs) }, flush_literal_pool(State5). @@ -846,12 +846,11 @@ return_if_not_equal_to_ctx( I2 = jit_armv6m_asm:bcc(eq, 2 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), RegBit = reg_bit(Reg), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State#state{ stream = Stream1, available_regs = AvailableRegs0 bor RegBit, used_regs = UsedRegs0 band (bnot RegBit), - regs = Regs1 + regs = Regs0 }. %%----------------------------------------------------------------------------- @@ -870,14 +869,14 @@ jump_to_label( {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), %% After unconditional jump, register tracking is dead until next label - State2 = State1#state{stream = Stream1, regs = jit_regs:invalidate_all(State1#state.regs)}, + State2 = State1#state{stream = Stream1, regs = jit_regs:unreachable(State1#state.regs)}, flush_literal_pool(State2). jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> Offset = StreamModule:offset(Stream0), CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), Stream1 = StreamModule:append(Stream0, CodeBlock), - State2 = State#state{stream = Stream1, regs = jit_regs:invalidate_all(State#state.regs)}, + State2 = State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}, flush_literal_pool(State2). %%----------------------------------------------------------------------------- @@ -941,7 +940,12 @@ jump_to_continuation( Code = <>, Stream2 = StreamModule:append(State1#state.stream, Code), % Free all registers as this is a terminal instruction - State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0}, + State2 = State1#state{ + stream = Stream2, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, + regs = jit_regs:unreachable(State1#state.regs) + }, flush_literal_pool(State2). branch_to_offset_code(#state{thumb2 = true}, Offset, TargetOffset) -> @@ -1222,7 +1226,7 @@ if_block_cond( Code = <>, Stream2 = StreamModule:append(Stream1, Code), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, le, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( @@ -1279,7 +1283,7 @@ if_block_cond( ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), Stream2 = StreamModule:append(Stream1, <>), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( @@ -1300,8 +1304,8 @@ if_block_cond( ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), Stream1 = StreamModule:append(Stream0, <>), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2b = jit_regs:invalidate_reg(State2#state.regs, Temp), - State3 = State2#state{stream = Stream1, regs = Regs2b}, + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), + State3 = State2#state{stream = Stream1, regs = Regs2}, {State3, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, @@ -1408,8 +1412,8 @@ if_block_cond( ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), Stream2 = StreamModule:append(Stream1, <>), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2c = jit_regs:invalidate_reg(State2#state.regs, Temp), - State3 = State2#state{stream = Stream2, regs = Regs2c}, + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), + State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = Available} = State0, @@ -1430,8 +1434,8 @@ if_block_cond( ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), Stream2 = StreamModule:append(Stream1, <>), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2d = jit_regs:invalidate_reg(State2#state.regs, Temp), - State3 = State2#state{stream = Stream2, regs = Regs2d}, + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), + State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{ @@ -2217,14 +2221,12 @@ set_registers_args1( Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; set_registers_args1( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + #state{available_regs = AvailRegs} = State, {y_reg, X}, Reg, _StackOffset ) -> - Code = ldr_y_reg(Reg, X, AvailRegs), - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}; + ldr_y_reg(State, Reg, X, AvailRegs); set_registers_args1( #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset ) when @@ -2306,7 +2308,7 @@ move_to_vm_register_emit( I1 = jit_armv6m_asm:movs(Temp2, N), YCode = str_y_reg(Temp2, Y, Temp1, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp1), Temp2), + Regs1 = jit_regs:set_contents(jit_regs:invalidate_reg(Regs0, Temp1), Temp2, {imm, N}), % str_y_reg may clobber first_avail(AT) for large offsets Regs2 = case AT of @@ -2323,7 +2325,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when I1 = jit_armv6m_asm:movs(Temp, N), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), State1#state{available_regs = AR0, regs = Regs1}; %% Handle large values using simple literal pool (branch-over pattern) move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when @@ -2333,7 +2335,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when AT = AR0 band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), State2 = move_to_vm_register(State1, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs1 = jit_regs:set_contents(State2#state.regs, Temp, {imm, N}), State2#state{available_regs = AR0, regs = Regs1}; % Source is a VM register move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> @@ -2342,7 +2344,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, ?MAX_REG}), State1#state{available_regs = AR0, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> Temp = first_avail(AR0), @@ -2350,7 +2352,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(X)), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), State1#state{available_regs = AR0, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> Temp = first_avail(AR0), @@ -2363,21 +2365,9 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest) -> Temp = first_avail(AR0), AT = AR0 band (bnot reg_bit(Temp)), - Code = ldr_y_reg(Temp, Y, AT), - Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), - % ldr_y_reg clobbers first_avail(AT) as a hidden temp for loading Y_REGS pointer - Regs0a = - case AT of - 0 -> State0#state.regs; - _ -> jit_regs:invalidate_reg(State0#state.regs, first_avail(AT)) - end, - State0a = State0#state{ - stream = Stream1, - available_regs = AT, - regs = Regs0a - }, + State0a = ldr_y_reg(State0#state{available_regs = AT}, Temp, Y, AT), State1 = move_to_vm_register(State0a, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), State1#state{available_regs = AR0, regs = Regs1}; % term_to_float move_to_vm_register_emit( @@ -2439,7 +2429,7 @@ move_array_element( I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), - Regs2 = jit_regs:invalidate_reg(Regs1, Temp), + Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( #state{stream_module = StreamModule, available_regs = Avail} = @@ -2466,7 +2456,7 @@ move_array_element( Stream2 = StreamModule:append(Stream1, <>), Regs1 = jit_regs:invalidate_vm_loc(State1#state.regs, {x_reg, X}), Regs2 = jit_regs:invalidate_reg(Regs1, Temp1), - Regs3 = jit_regs:invalidate_reg(Regs2, Temp2), + Regs3 = jit_regs:set_contents(Regs2, Temp2, {x_reg, X}), State1#state{stream = Stream2, regs = Regs3}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = @@ -2990,11 +2980,8 @@ move_to_native_register_emit( }; move_to_native_register_emit( #state{ - stream_module = StreamModule, - stream = Stream0, available_regs = Avail, - used_regs = Used, - regs = Regs0 + used_regs = Used } = State, {y_reg, Y}, Contents @@ -3002,22 +2989,15 @@ move_to_native_register_emit( Reg = first_avail(Avail), Bit = reg_bit(Reg), AvailT = Avail band (bnot Bit), - Code = ldr_y_reg(Reg, Y, AvailT), - Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:set_contents(Regs0, Reg, Contents), - % ldr_y_reg clobbers first_avail(AvailT) as a hidden temp for loading Y_REGS pointer - Regs2 = - case AvailT of - 0 -> Regs1; - _ -> jit_regs:invalidate_reg(Regs1, first_avail(AvailT)) - end, + State1 = ldr_y_reg( + State#state{available_regs = AvailT, used_regs = Used bor Bit}, + Reg, + Y, + AvailT + ), + Regs1 = jit_regs:set_contents(State1#state.regs, Reg, Contents), { - State#state{ - stream = Stream1, - available_regs = AvailT, - used_regs = Used bor Bit, - regs = Regs2 - }, + State1#state{regs = Regs1}, Reg }; move_to_native_register_emit( @@ -3061,40 +3041,39 @@ move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> Regs1 = jit_regs:set_contents(Regs0, RegDst, {imm, ValSrc}), State1#state{regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {ptr, Reg}, RegDst ) when ?IS_GPR(Reg) -> I1 = jit_armv6m_asm:ldr(RegDst, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:invalidate_reg(Regs0, RegDst), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + {x_reg, extra}, + RegDst ) -> I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(?MAX_REG)), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, extra}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, X}, RegDst ) when X < ?MAX_REG -> I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0, available_regs = AT, regs = Regs0} = - State, + #state{available_regs = AT} = State, {y_reg, Y}, RegDst ) -> - Code = ldr_y_reg(RegDst, Y, AT), - Stream1 = StreamModule:append(Stream0, Code), - % ldr_y_reg clobbers first_avail(AT) as a hidden temp for loading Y_REGS pointer - Regs1 = - case AT of - 0 -> Regs0; - _ -> jit_regs:invalidate_reg(Regs0, first_avail(AT)) - end, - State#state{stream = Stream1, regs = Regs1}; + State1 = ldr_y_reg(State, RegDst, Y, AT), + #state{regs = Regs0} = State1, + Regs1 = jit_regs:set_contents(Regs0, RegDst, {y_reg, Y}), + State1#state{regs = Regs1}; move_to_native_register( #state{ stream_module = StreamModule, @@ -3164,24 +3143,15 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = - State, + #state{available_regs = Avail} = State, {y_reg, Y} ) -> Reg = first_avail(Avail), AvailT = Avail band (bnot reg_bit(Reg)), - I1 = ldr_y_reg(Reg, Y, AvailT), + State1 = ldr_y_reg(State, Reg, Y, AvailT), I2 = jit_armv6m_asm:str(Reg, ?CP), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - % ldr_y_reg clobbers first_avail(AvailT) as a hidden temp for loading Y_REGS pointer - Regs1a = jit_regs:invalidate_reg(Regs0, Reg), - Regs1 = - case AvailT of - 0 -> Regs1a; - _ -> jit_regs:invalidate_reg(Regs1a, first_avail(AvailT)) - end, - State#state{stream = Stream1, regs = Regs1}. + Stream1 = (State1#state.stream_module):append(State1#state.stream, I2), + State1#state{stream = Stream1}. increment_sp( #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = @@ -3309,13 +3279,14 @@ get_module_index( I2 = jit_armv6m_asm:ldr(Reg, ?MODULE_INDEX(Reg)), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, TempJitState), Reg), + Regs1 = jit_regs:invalidate_reg(Regs0, TempJitState), + Regs2 = jit_regs:set_contents(Regs1, Reg, module_index), { State#state{ stream = Stream1, available_regs = Avail1, used_regs = UsedRegs0 bor RegBit, - regs = Regs1 + regs = Regs2 }, Reg }. @@ -3872,8 +3843,8 @@ decrement_reductions_and_maybe_schedule_next( Stream4, BNEOffset, <> ), State3 = merge_used_regs(State2#state{stream = Stream5}, State1#state.used_regs), - MergedRegs = jit_regs:merge(State1#state.regs, State2#state.regs), - State3#state{regs = MergedRegs}. + %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. + State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -4113,13 +4084,26 @@ str_y_reg(SrcReg, Y, TempReg1, 0) -> <>. %% Helper function to generate ldr instruction with y_reg offset, handling large offsets -ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0, Y * 4 =< 124 -> +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + DstReg, + Y, + AvailMask +) when AvailMask =/= 0, Y * 4 =< 124 -> % Small offset - use immediate addressing TempReg = first_avail(AvailMask), I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), I2 = jit_armv6m_asm:ldr(DstReg, {TempReg, Y * 4}), - <>; -ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0 -> + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, DstReg), TempReg), + State#state{stream = Stream1, regs = Regs1}; +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, + DstReg, + Y, + AvailMask +) when AvailMask =/= 0 -> % Large offset - use DstReg as second temp register for arithmetic TempReg = first_avail(AvailMask), Offset = Y * 4, @@ -4127,13 +4111,25 @@ ldr_y_reg(DstReg, Y, AvailMask) when AvailMask =/= 0 -> I2 = jit_armv6m_asm:movs(DstReg, Offset), I3 = jit_armv6m_asm:add(DstReg, TempReg), I4 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), - <>; -ldr_y_reg(DstReg, Y, 0) when Y * 4 =< 124 -> + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, DstReg), TempReg), + State#state{stream = Stream1, regs = Regs1}; +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DstReg, Y, 0 +) when + Y * 4 =< 124 +-> % Small offset, no registers available - use DstReg as temp I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS), I2 = jit_armv6m_asm:ldr(DstReg, {DstReg, Y * 4}), - <>; -ldr_y_reg(DstReg, Y, 0) -> + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, DstReg), + State#state{stream = Stream1, regs = Regs1}; +ldr_y_reg( + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, DstReg, Y, 0 +) -> % Large offset, no registers available - use IP_REG as temp register % Note: IP_REG (r12) can only be used with mov, not ldr directly Offset = Y * 4, @@ -4142,7 +4138,10 @@ ldr_y_reg(DstReg, Y, 0) -> I3 = jit_armv6m_asm:movs(DstReg, Offset), I4 = jit_armv6m_asm:add(DstReg, ?IP_REG), I5 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), - <>. + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + Regs1 = jit_regs:invalidate_reg(Regs0, DstReg), + State#state{stream = Stream1, regs = Regs1}. reg_bit(r0) -> ?REG_BIT_R0; reg_bit(r1) -> ?REG_BIT_R1; diff --git a/libs/jit/src/jit_regs.erl b/libs/jit/src/jit_regs.erl index 5d94bed5f6..881ceb2ae2 100644 --- a/libs/jit/src/jit_regs.erl +++ b/libs/jit/src/jit_regs.erl @@ -39,6 +39,7 @@ get_contents/2, get_all_contents/1, set_contents/3, + unreachable/1, invalidate_reg/2, invalidate_all/1, invalidate_volatile/2, @@ -75,7 +76,8 @@ -record(regs, { contents = #{} :: #{atom() => contents()}, - stack = [] :: [atom() | contents()] + stack = [] :: [atom() | contents()], + unreachable = false :: boolean() }). -opaque regs() :: #regs{}. @@ -99,6 +101,11 @@ get_all_contents(#regs{contents = C}) -> C. set_contents(#regs{contents = C} = Regs, Reg, Contents) -> Regs#regs{contents = C#{Reg => Contents}}. +%% @doc Mark register tracking as unreachable after terminal control flow. +-spec unreachable(regs()) -> regs(). +unreachable(Regs) -> + Regs#regs{contents = #{}, stack = [], unreachable = true}. + %% @doc Invalidate tracking for a single CPU register (e.g. it was clobbered). -spec invalidate_reg(regs(), atom()) -> regs(). invalidate_reg(#regs{contents = C} = Regs, Reg) -> @@ -107,7 +114,7 @@ invalidate_reg(#regs{contents = C} = Regs, Reg) -> %% @doc Invalidate all register tracking (e.g. at a label or unknown branch target). -spec invalidate_all(regs()) -> regs(). invalidate_all(Regs) -> - Regs#regs{contents = #{}, stack = []}. + Regs#regs{contents = #{}, stack = [], unreachable = false}. %% @doc Invalidate registers that are volatile across a C function call. %% On x86-64 System V ABI, all our scratch registers (rax, rcx, rdx, rsi, rdi, @@ -143,6 +150,12 @@ find_in_map(Iterator, Contents) -> %% @doc Merge two register tracking states (for control flow merge points). %% Only keeps information that is consistent in both states. -spec merge(regs(), regs()) -> regs(). +merge(#regs{unreachable = true}, #regs{unreachable = true}) -> + unreachable(new()); +merge(#regs{unreachable = true}, #regs{} = Regs) -> + Regs#regs{stack = []}; +merge(#regs{} = Regs, #regs{unreachable = true}) -> + Regs#regs{stack = []}; merge(#regs{contents = C1}, #regs{contents = C2}) -> %% Keep only entries that match in both maps MergedContents = maps:filter( diff --git a/libs/jit/src/jit_riscv_impl.hrl b/libs/jit/src/jit_riscv_impl.hrl index fd76b98592..ae7283696f 100644 --- a/libs/jit/src/jit_riscv_impl.hrl +++ b/libs/jit/src/jit_riscv_impl.hrl @@ -419,7 +419,7 @@ call_primitive_last( State4#state{ available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State4#state.regs) + regs = jit_regs:unreachable(State4#state.regs) }. %%----------------------------------------------------------------------------- @@ -475,12 +475,11 @@ return_if_not_equal_to_ctx( I1 = ?ASM:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)), Stream1 = StreamModule:append(Stream0, <>), RegBit = reg_bit(Reg), - Regs1 = jit_regs:invalidate_reg(State#state.regs, Reg), State#state{ stream = Stream1, available_regs = AvailableRegs0 bor RegBit, used_regs = UsedRegs0 band (bnot RegBit), - regs = Regs1 + regs = State#state.regs }. %%----------------------------------------------------------------------------- @@ -499,13 +498,13 @@ jump_to_label( {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), %% After unconditional jump, register tracking is dead until next label - State1#state{stream = Stream1, regs = jit_regs:invalidate_all(State1#state.regs)}. + State1#state{stream = Stream1, regs = jit_regs:unreachable(State1#state.regs)}. jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> Offset = StreamModule:offset(Stream0), CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), Stream1 = StreamModule:append(Stream0, CodeBlock), - State#state{stream = Stream1, regs = jit_regs:invalidate_all(State#state.regs)}. + State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}. %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register @@ -540,7 +539,12 @@ jump_to_continuation( Code = <>, Stream1 = StreamModule:append(Stream0, Code), % Free all registers since this is a tail jump - State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0}. + State0#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS_MASK, + used_regs = 0, + regs = jit_regs:unreachable(State0#state.regs) + }. branch_to_offset_code(_State, Offset, TargetOffset) when TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 @@ -750,7 +754,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bge, Reg, Temp}, BranchDelta}; if_block_cond( @@ -771,7 +775,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bge, Reg, Temp}, BranchDelta}; if_block_cond( @@ -792,7 +796,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bge, Temp, Reg}, BranchDelta}; if_block_cond( @@ -813,7 +817,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bge, Temp, Reg}, BranchDelta}; if_block_cond( @@ -883,7 +887,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {beq, Reg, Temp}, BranchDelta}; if_block_cond( @@ -921,7 +925,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bne, Reg, Temp}, BranchDelta}; if_block_cond( @@ -953,7 +957,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {bne, Reg, Temp}, BranchDelta}; if_block_cond( @@ -974,7 +978,7 @@ if_block_cond( BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), - Regs2 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs2 = jit_regs:set_contents(State2#state.regs, Temp, {imm, Val}), State3 = State2#state{stream = Stream2, regs = Regs2}, {State3, {beq, Reg, Temp}, BranchDelta}; if_block_cond( @@ -1791,7 +1795,7 @@ move_to_vm_register_emit( I1 = ?ASM:li(Temp2, N), YCode = str_y_reg(Temp2, Y, Temp1, AT), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp1), Temp2), + Regs1 = jit_regs:set_contents(jit_regs:invalidate_reg(Regs0, Temp1), Temp2, {imm, N}), % str_y_reg may clobber first_avail(AT) for large offsets Regs2 = case AT of @@ -1808,7 +1812,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when I1 = ?ASM:li(Temp, N), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {imm, N}), State1#state{available_regs = AR0, regs = Regs1}; %% Handle large values using simple literal pool (branch-over pattern) move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when @@ -1818,7 +1822,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, N, Dest) when AT = AR0 band (bnot reg_bit(Temp)), State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), State2 = move_to_vm_register(State1, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State2#state.regs, Temp), + Regs1 = jit_regs:set_contents(State2#state.regs, Temp, {imm, N}), State2#state{available_regs = AR0, regs = Regs1}; % Source is a VM register move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, Dest) -> @@ -1828,7 +1832,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, extra}, I1 = ?LOAD_WORD(Temp, BaseReg, Off), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, ?MAX_REG}), State1#state{available_regs = AR0, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest) -> Temp = first_avail(AR0), @@ -1837,7 +1841,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {x_reg, X}, Dest I1 = ?LOAD_WORD(Temp, XReg, X_REGOffset), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {x_reg, X}), State1#state{available_regs = AR0, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = AR0} = State0, {ptr, Reg}, Dest) -> Temp = first_avail(AR0), @@ -1864,7 +1868,7 @@ move_to_vm_register_emit(#state{available_regs = AR0} = State0, {y_reg, Y}, Dest regs = Regs0a }, State1 = move_to_vm_register(State0a, Temp, Dest), - Regs1 = jit_regs:invalidate_reg(State1#state.regs, Temp), + Regs1 = jit_regs:set_contents(State1#state.regs, Temp, {y_reg, Y}), State1#state{available_regs = AR0, regs = Regs1}; % term_to_float move_to_vm_register_emit( @@ -1922,7 +1926,7 @@ move_array_element( I2 = ?STORE_WORD(BaseReg, Temp, Off), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), - Regs2 = jit_regs:invalidate_reg(Regs1, Temp), + Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = @@ -2405,27 +2409,30 @@ move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> Regs1 = jit_regs:set_contents(Regs0, RegDst, {imm, ValSrc}), State1#state{regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {ptr, Reg}, RegDst ) when ?IS_GPR(Reg) -> I1 = ?LOAD_WORD(RegDst, Reg, 0), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:invalidate_reg(Regs0, RegDst), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, extra}, RegDst ) -> {BaseReg, Off} = ?X_REG(?MAX_REG), I1 = ?LOAD_WORD(RegDst, BaseReg, Off), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, extra}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst + #state{stream_module = StreamModule, stream = Stream0, regs = Regs0} = State, {x_reg, X}, RegDst ) when X < ?MAX_REG -> {XReg, X_REGOffset} = ?X_REG(X), I1 = ?LOAD_WORD(RegDst, XReg, X_REGOffset), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; + Regs1 = jit_regs:set_contents(Regs0, RegDst, {x_reg, X}), + State#state{stream = Stream1, regs = Regs1}; move_to_native_register( #state{stream_module = StreamModule, stream = Stream0, available_regs = AT, regs = Regs0} = State, @@ -2437,8 +2444,11 @@ move_to_native_register( % ldr_y_reg clobbers first_avail(AT) as a hidden temp for loading Y_REGS pointer Regs1 = case AT of - 0 -> Regs0; - _ -> jit_regs:invalidate_reg(Regs0, first_avail(AT)) + 0 -> jit_regs:set_contents(Regs0, RegDst, {y_reg, Y}); + _ -> + jit_regs:invalidate_reg( + jit_regs:set_contents(Regs0, RegDst, {y_reg, Y}), first_avail(AT) + ) end, State#state{stream = Stream1, regs = Regs1}; move_to_native_register( @@ -2522,7 +2532,7 @@ move_to_cp( Code = <>, Stream1 = StreamModule:append(Stream0, Code), % ldr_y_reg clobbers first_avail(AvailT) as a hidden temp for loading Y_REGS pointer - Regs1a = jit_regs:invalidate_reg(Regs0, Reg), + Regs1a = jit_regs:set_contents(Regs0, Reg, {y_reg, Y}), Regs1 = case AvailT of 0 -> Regs1a; @@ -2627,7 +2637,7 @@ get_module_index( I2 = ?ASM:lw(Reg, Reg, 0), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + Regs1 = jit_regs:set_contents(Regs0, Reg, module_index), { State#state{ stream = Stream1, @@ -2982,9 +2992,8 @@ decrement_reductions_and_maybe_schedule_next( ), StreamN = Stream4, State3 = merge_used_regs(State2#state{stream = StreamN}, State1#state.used_regs), - %% The schedule_next path is a tail call (dead end), so the register tracking - %% from the non-taken path (State1) is what matters at the continuation. - State3#state{regs = State1#state.regs}. + %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. + State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. call_or_schedule_next(State0, Label) -> {State1, RewriteOffset, TempReg} = set_cp(State0), @@ -3455,4 +3464,3 @@ add_label( }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. - diff --git a/libs/jit/src/jit_wasm32.erl b/libs/jit/src/jit_wasm32.erl index 8a32d1e7ba..03aff9037f 100644 --- a/libs/jit/src/jit_wasm32.erl +++ b/libs/jit/src/jit_wasm32.erl @@ -365,7 +365,7 @@ call_primitive_last(State0, Primitive, Args) -> State1#state{ available_regs = AllFree, used_regs = 0, - regs = jit_regs:invalidate_all(State1#state.regs) + regs = jit_regs:unreachable(State1#state.regs) }. call_primitive_with_cp(State0, Primitive, Args) -> @@ -439,7 +439,7 @@ jump_to_label(State0, Label) -> (jit_wasm32_asm:return())/binary >>), %% After unconditional jump, register tracking is dead until next label - State2#state{regs = jit_regs:invalidate_all(State2#state.regs)}. + State2#state{regs = jit_regs:unreachable(State2#state.regs)}. jump_to_offset(State0, TargetOffset) -> %% The tail cache in jit.erl stores offsets from offset(). Since we return @@ -457,7 +457,7 @@ jump_to_offset(State0, TargetOffset) -> (jit_wasm32_asm:local_get(?CTX_LOCAL))/binary, (jit_wasm32_asm:return())/binary >>), - State1#state{regs = jit_regs:invalidate_all(State1#state.regs)} + State1#state{regs = jit_regs:unreachable(State1#state.regs)} end. cond_jump_to_label(State, Cond, Label) -> @@ -479,7 +479,11 @@ jump_to_continuation(State0, {free, OffsetLocal}) -> >>, State1 = emit(State0, Code), AllFree = (1 bsl State1#state.max_scratch) - 1, - State1#state{available_regs = AllFree, used_regs = 0}. + State1#state{ + available_regs = AllFree, + used_regs = 0, + regs = jit_regs:unreachable(State1#state.regs) + }. %%============================================================================= %% Conditional blocks diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index b6b163acce..353982683f 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -560,7 +560,7 @@ call_primitive_last( stream = Stream1, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State0#state.regs) + regs = jit_regs:unreachable(State0#state.regs) }; call_primitive_last( #state{ @@ -611,7 +611,7 @@ call_primitive_last( stream = Stream3, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State1#state.regs) + regs = jit_regs:unreachable(State1#state.regs) }. %%----------------------------------------------------------------------------- @@ -642,12 +642,11 @@ return_if_not_equal_to_ctx( I2 = jit_x86_64_asm:jz(byte_size(I3) + byte_size(I4) + 2), Stream1 = StreamModule:append(Stream0, <>), RegBit = reg_bit(Reg), - Regs1 = jit_regs:invalidate_reg(State#state.regs, Reg), State#state{ stream = Stream1, available_regs = AvailableRegs0 bor RegBit, used_regs = UsedRegs0 band (bnot RegBit), - regs = Regs1 + regs = State#state.regs }. %%----------------------------------------------------------------------------- @@ -673,7 +672,7 @@ jump_to_label( I1 = jit_x86_64_asm:jmp(RelOffset), Stream1 = StreamModule:append(Stream0, I1), %% After unconditional jump, register tracking is dead until next label - State#state{stream = Stream1, regs = jit_regs:invalidate_all(State#state.regs)}; + State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}; false -> % Label not yet known, emit placeholder and add relocation {RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(1), @@ -682,7 +681,7 @@ jump_to_label( State#state{ stream = Stream1, branches = [Reloc | AccBranches], - regs = jit_regs:invalidate_all(State#state.regs) + regs = jit_regs:unreachable(State#state.regs) } end. @@ -691,7 +690,7 @@ jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, T RelOffset = TargetOffset - Offset, I1 = jit_x86_64_asm:jmp(RelOffset), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1, regs = jit_regs:invalidate_all(State#state.regs)}. + State#state{stream = Stream1, regs = jit_regs:unreachable(State#state.regs)}. %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. @@ -732,7 +731,7 @@ jump_to_continuation( stream = Stream1, available_regs = ?AVAILABLE_REGS_MASK, used_regs = 0, - regs = jit_regs:invalidate_all(State#state.regs) + regs = jit_regs:unreachable(State#state.regs) }. %%----------------------------------------------------------------------------- @@ -876,7 +875,7 @@ if_block_cond0(State0, {Value, '<', RegOrTuple}) when ?IS_SINT32_T(Value) -> {State1, <>, byte_size(I1) + RelocJLEOffset}; % Catch-all for large values outside SINT32_T range if_block_cond0( - #state{available_regs = Avail} = State0, {Value, '<', RegOrTuple} + #state{available_regs = Avail, regs = Regs0} = State0, {Value, '<', RegOrTuple} ) when is_integer(Value) -> Temp = first_avail(Avail), Reg = @@ -887,7 +886,8 @@ if_block_cond0( I1 = jit_x86_64_asm:movabsq(Value, Temp), I2 = jit_x86_64_asm:cmpq(Temp, Reg), {RelocJLEOffset, I3} = jit_x86_64_asm:jle_rel8(1), - State1 = if_block_free_reg(RegOrTuple, State0), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, Value}), + State1 = if_block_free_reg(RegOrTuple, State0#state{regs = Regs1}), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJLEOffset}; if_block_cond0(State0, {RegOrTuple, '<', Value}) when ?IS_SINT32_T(Value) -> Reg = @@ -911,7 +911,7 @@ if_block_cond0(State0, {RegOrTuple, '<', RegB}) when is_atom(RegB) -> {State1, <>, byte_size(I1) + RelocJGEOffset}; % Catch-all for large values outside SINT32_T range if_block_cond0( - #state{available_regs = Avail} = State0, {RegOrTuple, '<', Value} + #state{available_regs = Avail, regs = Regs0} = State0, {RegOrTuple, '<', Value} ) when is_integer(Value) -> Temp = first_avail(Avail), Reg = @@ -922,7 +922,8 @@ if_block_cond0( I1 = jit_x86_64_asm:movabsq(Value, Temp), I2 = jit_x86_64_asm:cmpq(Temp, Reg), {RelocJGEOffset, I3} = jit_x86_64_asm:jge_rel8(1), - State1 = if_block_free_reg(RegOrTuple, State0), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, Value}), + State1 = if_block_free_reg(RegOrTuple, State0#state{regs = Regs1}), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJGEOffset}; if_block_cond0(State0, {RegOrTuple, '==', 0}) -> Reg = @@ -958,7 +959,7 @@ if_block_cond0( State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJZOffset}; if_block_cond0( - #state{available_regs = Avail} = State0, + #state{available_regs = Avail, regs = Regs0} = State0, {RegOrTuple, '!=', Val} ) when is_integer(Val) orelse ?IS_GPR(Val) -> Temp = first_avail(Avail), @@ -970,7 +971,8 @@ if_block_cond0( I1 = jit_x86_64_asm:movabsq(Val, Temp), I2 = jit_x86_64_asm:cmpq(Temp, Reg), {RelocJZOffset, I3} = jit_x86_64_asm:jz_rel8(1), - State1 = if_block_free_reg(RegOrTuple, State0), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, Val}), + State1 = if_block_free_reg(RegOrTuple, State0#state{regs = Regs1}), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJZOffset}; if_block_cond0( State0, @@ -999,7 +1001,7 @@ if_block_cond0( State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + RelocJZOffset}; if_block_cond0( - #state{available_regs = Avail} = State0, + #state{available_regs = Avail, regs = Regs0} = State0, {RegOrTuple, '==', Val} ) when is_integer(Val) orelse ?IS_GPR(Val) -> Temp = first_avail(Avail), @@ -1011,7 +1013,8 @@ if_block_cond0( I1 = jit_x86_64_asm:movabsq(Val, Temp), I2 = jit_x86_64_asm:cmpq(Temp, Reg), {RelocJZOffset, I3} = jit_x86_64_asm:jnz_rel8(1), - State1 = if_block_free_reg(RegOrTuple, State0), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, Val}), + State1 = if_block_free_reg(RegOrTuple, State0#state{regs = Regs1}), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJZOffset}; if_block_cond0(State0, {{free, Reg1}, '==', {free, Reg2}}) -> % Compare two free registers @@ -1079,14 +1082,18 @@ if_block_cond0(#state{regs = Regs0} = State0, {{free, Reg} = RegTuple, '&', Mask Regs1 = jit_regs:invalidate_reg(Regs0, Reg), State1 = if_block_free_reg(RegTuple, State0#state{regs = Regs1}), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJZOffset}; -if_block_cond0(State0, {Reg, '&', Mask, '!=', Val}) when ?IS_UINT8_T(Mask) -> +if_block_cond0(#state{regs = Regs0} = State0, {Reg, '&', Mask, '!=', Val}) when ?IS_UINT8_T(Mask) -> Temp = first_avail(State0#state.available_regs), I1 = jit_x86_64_asm:movq(Reg, Temp), I2 = jit_x86_64_asm:andb(Mask, Temp), I3 = jit_x86_64_asm:cmpb(Val, Temp), {RelocJZOffset, I4} = jit_x86_64_asm:jz_rel8(1), - {State0, <>, - byte_size(I1) + byte_size(I2) + byte_size(I3) + RelocJZOffset}. + Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + { + State0#state{regs = Regs1}, + <>, + byte_size(I1) + byte_size(I2) + byte_size(I3) + RelocJZOffset + }. -spec if_block_free_reg(x86_64_register() | {free, x86_64_register()}, state()) -> state(). if_block_free_reg({free, Reg}, #state{available_regs = AvR0, used_regs = UR0} = State0) -> @@ -1600,7 +1607,7 @@ move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), Stream1 = (State#state.stream_module):append(State#state.stream, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, N}), State#state{stream = Stream1, regs = Regs1}; move_to_vm_register_emit( #state{available_regs = Avail, regs = Regs0} = State, N, {x_reg, extra} @@ -1611,7 +1618,7 @@ move_to_vm_register_emit( I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, ?X_REG(?MAX_REG)), Stream1 = (State#state.stream_module):append(State#state.stream, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, N}), State#state{stream = Stream1, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N, {ptr, Reg}) when is_integer(N) @@ -1620,7 +1627,7 @@ move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N I1 = jit_x86_64_asm:movabsq(N, Temp), I2 = jit_x86_64_asm:movq(Temp, {0, Reg}), Stream1 = (State#state.stream_module):append(State#state.stream, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, N}), State#state{stream = Stream1, regs = Regs1}; move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N, {y_reg, Y}) when is_integer(N) @@ -1633,7 +1640,7 @@ move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State, N Stream1 = (State#state.stream_module):append( State#state.stream, <> ), - Regs1 = jit_regs:invalidate_reg(jit_regs:invalidate_reg(Regs0, Temp1), Temp2), + Regs1 = jit_regs:set_contents(jit_regs:invalidate_reg(Regs0, Temp1), Temp2, {imm, N}), State#state{stream = Stream1, regs = Regs1}; % is_atom(Src) (native register) move_to_vm_register_emit(State, Reg, {x_reg, X}) when is_atom(Reg) andalso X < ?MAX_REG -> @@ -1668,7 +1675,7 @@ move_to_vm_register_emit( TempBit = reg_bit(Temp), I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, X}), State1 = move_to_vm_register_emit( State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit), regs = Regs1}, Temp, @@ -1682,7 +1689,7 @@ move_to_vm_register_emit( TempBit = reg_bit(Temp), I1 = jit_x86_64_asm:movq(?X_REG(?MAX_REG), Temp), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, ?MAX_REG}), State1 = move_to_vm_register_emit( State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit), regs = Regs1}, Temp, @@ -1707,7 +1714,7 @@ move_to_vm_register_emit(#state{available_regs = Avail, regs = Regs0} = State0, I1 = jit_x86_64_asm:movq(?Y_REGS, Temp), I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {y_reg, Y}), State1 = move_to_vm_register_emit( State0#state{stream = Stream1, available_regs = Avail band (bnot TempBit), regs = Regs1}, Temp, @@ -1758,7 +1765,7 @@ move_array_element( I2 = jit_x86_64_asm:movq(Temp, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, <>), Regs1 = jit_regs:invalidate_vm_loc(Regs0, {x_reg, X}), - Regs2 = jit_regs:invalidate_reg(Regs1, Temp), + Regs2 = jit_regs:set_contents(Regs1, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs2}; move_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = @@ -1961,7 +1968,7 @@ move_to_array_element( I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = @@ -1989,7 +1996,7 @@ move_to_array_element( I3 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {y_reg, Y}), State#state{stream = Stream1, regs = Regs1}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0} = State, Source, Reg, Index @@ -2014,7 +2021,7 @@ move_to_array_element( I1 = jit_x86_64_asm:movabsq(Source, Temp), I2 = jit_x86_64_asm:movq(Temp, {Index * 8, Reg}), Stream1 = StreamModule:append(Stream0, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {imm, Source}), State#state{stream = Stream1, regs = Regs1}. %%----------------------------------------------------------------------------- @@ -2048,7 +2055,7 @@ move_to_array_element( I1 = jit_x86_64_asm:movq(?X_REG(X), Temp), I2 = jit_x86_64_asm:movq(Temp, {Offset * ?WORD_SIZE, BaseReg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {x_reg, X}), State#state{stream = Stream1, regs = Regs1}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0, available_regs = Avail, regs = Regs0} = @@ -2063,7 +2070,7 @@ move_to_array_element( I2 = jit_x86_64_asm:movq({Y * 8, Temp}, Temp), I3 = jit_x86_64_asm:movq(Temp, {Offset * ?WORD_SIZE, BaseReg, IndexReg, 8}), Stream1 = StreamModule:append(Stream0, <>), - Regs1 = jit_regs:invalidate_reg(Regs0, Temp), + Regs1 = jit_regs:set_contents(Regs0, Temp, {y_reg, Y}), State#state{stream = Stream1, regs = Regs1}; move_to_array_element( #state{stream_module = StreamModule, stream = Stream0} = State, @@ -2356,7 +2363,7 @@ move_to_cp( I3 = jit_x86_64_asm:movq(Reg, ?CP), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - Regs1 = jit_regs:invalidate_reg(Regs0, Reg), + Regs1 = jit_regs:set_contents(Regs0, Reg, {y_reg, Y}), State#state{stream = Stream1, regs = Regs1}. increment_sp( @@ -2825,9 +2832,8 @@ decrement_reductions_and_maybe_schedule_next( >> ), State3 = merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs), - %% The schedule_next path is a tail call (dead end), so the register tracking - %% from the non-taken path (State1) is what matters at the continuation. - State3#state{regs = State1#state.regs}. + %% schedule_next clobbers caller-saved regs; invalidate cache at continuation. + State3#state{regs = jit_regs:invalidate_all(State1#state.regs)}. -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 3aae8b5a95..85fab41830 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -301,6 +301,104 @@ call_primitive_last_test() -> >>, ?assertStream(aarch64, Dump, Stream). +unreachable_test_state() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)). + +setup_cached_x_reg0(State0) -> + {State1, CondReg} = ?BACKEND:move_to_native_register(State0, 1), + {State2, CachedReg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + {?BACKEND:free_native_registers(State2, [CachedReg]), CondReg}. + +setup_cached_x_reg0_with_offset(State0) -> + {State1, OffsetReg} = ?BACKEND:move_to_native_register(State0, 16#100), + {State2, CondReg} = ?BACKEND:move_to_native_register(State1, 1), + {State3, CachedReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + {?BACKEND:free_native_registers(State3, [CachedReg]), CondReg, OffsetReg, CachedReg}. + +terminal_if_preserves_cached_x_reg0(State0, TerminalFun) -> + {State1, CondReg} = setup_cached_x_reg0(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, TerminalFun), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State3. + +call_primitive_last_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, 0, [ctx, jit_state]) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: d2800027 mov x7, #0x1\n" + " 4: f9401808 ldr x8, [x0, #48]\n" + " 8: b5000067 cbnz x7, 0x14\n" + " c: f9400047 ldr x7, [x2]\n" + " 10: d61f00e0 br x7" + >>, + ?assertStream(aarch64, Dump, Stream). + +jump_to_label_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, 42) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: d2800027 mov x7, #0x1\n" + " 4: f9401808 ldr x8, [x0, #48]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 14000000 b 0xc" + >>, + ?assertStream(aarch64, Dump, Stream). + +jump_to_offset_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_offset(BSt0, 16#100) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: d2800027 mov x7, #0x1\n" + " 4: f9401808 ldr x8, [x0, #48]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 1400003d b 0x100" + >>, + ?assertStream(aarch64, Dump, Stream). + +jump_to_continuation_if_block_preserves_cache_test() -> + State0 = unreachable_test_state(), + {State1, CondReg, OffsetReg, CachedReg} = setup_cached_x_reg0_with_offset(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, fun(BSt0) -> + ?BACKEND:jump_to_continuation(BSt0, {free, OffsetReg}) + end), + Offset2 = ?BACKEND:offset(State2), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + ?assertEqual(CachedReg, Reg), + Offset3 = ?BACKEND:offset(State3), + ?assertEqual(Offset2, Offset3), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: d2802007 mov x7, #0x100\n" + " 4: d2800028 mov x8, #0x1\n" + " 8: f9401809 ldr x9, [x0, #48]\n" + " c: b5000088 cbnz x8, 0x1c\n" + " 10: 10ffff88 adr x8, 0x0\n" + " 14: 8b070108 add x8, x8, x7\n" + " 18: d61f0100 br x8" + >>, + ?assertStream(aarch64, Dump, Stream). + +move_array_element_x_reg_invalidates_vm_loc_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + {State2, r8} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:move_array_element(State2, r8, 0, {x_reg, 5}), + {State4, _Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 5}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: f9402c07 ldr x7, [x0, #88]\n" + " 4: f9401808 ldr x8, [x0, #48]\n" + " 8: f9400109 ldr x9, [x8]\n" + " c: f9002c09 str x9, [x0, #88]" + >>, + ?assertStream(aarch64, Dump, Stream). + return_if_not_equal_to_ctx_test_() -> {setup, fun() -> @@ -1563,6 +1661,27 @@ call_fun_test() -> >>, ?assertStream(aarch64, Dump, Stream). +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:free_native_registers(State1, [Reg]), + State3 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State2), + {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: b9401027 ldr w7, [x1, #16]\n" + " 8: f10004e7 subs x7, x7, #0x1\n" + " c: b9001027 str w7, [x1, #16]\n" + " 10: 540000a1 b.ne 0x24\n" + " 14: 10000087 adr x7, 0x24\n" + " 18: f9000427 str x7, [x1, #8]\n" + " 1c: f9400847 ldr x7, [x2, #16]\n" + " 20: d61f00e0 br x7\n" + " 24: f9401807 ldr x7, [x0, #48]" + >>, + ?assertStream(aarch64, Dump, Stream). + move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), Stream = ?BACKEND:stream(State1), @@ -2252,3 +2371,61 @@ cached_load_after_free_test() -> " 0: f9401807 ldr x7, [x0, #48]" >>, ?assertStream(aarch64, Dump, Stream). + +fixed_dst_x_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r8), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 2}), + ?assertEqual(r8, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9402008 ldr x8, [x0, #64]" + >>, + ?assertStream(aarch64, Dump, Stream). + +fixed_dst_y_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r8), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {y_reg, 2}), + ?assertEqual(r8, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401408 ldr x8, [x0, #40]\n" + " 4: f9400908 ldr x8, [x8, #16]" + >>, + ?assertStream(aarch64, Dump, Stream). + +%% After copying an x_reg to another vm location, the temp register holding the +%% x_reg value is cached so a subsequent load of the same x_reg skips the ldr +cached_move_to_vm_x_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 1}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, r7} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401c07 ldr x7, [x0, #56]\n" + " 4: f9001807 str x7, [x0, #48]" + >>, + ?assertStream(aarch64, Dump, Stream). + +%% After copying a y_reg to an x_reg, the temp register holding the y_reg value +%% is cached so a subsequent load of the same y_reg skips the ldrs +cached_move_to_vm_y_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, r7} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94000e7 ldr x7, [x7]\n" + " 8: f9001807 str x7, [x0, #48]" + >>, + ?assertStream(aarch64, Dump, Stream). diff --git a/tests/libs/jit/jit_arm32_tests.erl b/tests/libs/jit/jit_arm32_tests.erl index 1307098318..5ff9c5749b 100644 --- a/tests/libs/jit/jit_arm32_tests.erl +++ b/tests/libs/jit/jit_arm32_tests.erl @@ -705,6 +705,32 @@ call_ext_only_test() -> >>, ?assertStream(arm32, Dump, Stream). +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:free_native_registers(State1, [Reg]), + State3 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State2), + {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: e590b018 ldr fp, [r0, #24]\n" + " 4: e59da000 ldr sl, [sp]\n" + " 8: e59ab008 ldr fp, [sl, #8]\n" + " c: e25bb001 subs fp, fp, #1\n" + " 10: e58ab008 str fp, [sl, #8]\n" + " 14: 1a000007 bne 0x38\n" + " 18: e28fb014 add fp, pc, #20\n" + " 1c: e58ab004 str fp, [sl, #4]\n" + " 20: e592b008 ldr fp, [r2, #8]\n" + " 24: e59d7024 ldr r7, [sp, #36] @ 0x24\n" + " 28: e58db024 str fp, [sp, #36] @ 0x24\n" + " 2c: e1a0e007 mov lr, r7\n" + " 30: e8bd8ff2 pop {r1, r4, r5, r6, r7, r8, r9, sl, fp, pc}\n" + " 34: e92d4ff2 push {r1, r4, r5, r6, r7, r8, r9, sl, fp, lr}\n" + " 38: e590b018 ldr fp, [r0, #24]" + >>, + ?assertStream(arm32, Dump, Stream). + call_only_or_schedule_next_and_label_relocation_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_table(State0, 2), @@ -1253,6 +1279,33 @@ cached_load_after_free_test() -> >>, ?assertStream(arm32, Dump, Stream). +fixed_dst_x_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r3), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 2}), + ?assertEqual(r3, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: e5903020 ldr r3, [r0, #32]" + >>, + ?assertStream(arm32, Dump, Stream). + +fixed_dst_y_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {y_reg, 2}), + ?assertEqual(r1, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: e590b014 ldr fp, [r0, #20]\n" + " 4: e59b1008 ldr r1, [fp, #8]" + >>, + ?assertStream(arm32, Dump, Stream). + %% and_ with negative immediate should invalidate temp register cache and_negative_imm_invalidates_temp_cache_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -1304,13 +1357,108 @@ jump_to_label_invalidates_cache_test() -> >>, ?assertStream(arm32, Dump, Stream). +unreachable_test_state() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)). + +setup_cached_x_reg0(State0) -> + {State1, CondReg} = ?BACKEND:move_to_native_register(State0, 1), + {State2, CachedReg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + {?BACKEND:free_native_registers(State2, [CachedReg]), CondReg}. + +setup_cached_x_reg0_with_offset(State0) -> + {State1, OffsetReg} = ?BACKEND:move_to_native_register(State0, 16#100), + {State2, CondReg} = ?BACKEND:move_to_native_register(State1, 1), + {State3, CachedReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + {?BACKEND:free_native_registers(State3, [CachedReg]), CondReg, OffsetReg, CachedReg}. + +terminal_if_preserves_cached_x_reg0(State0, TerminalFun) -> + {State1, CondReg} = setup_cached_x_reg0(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, TerminalFun), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State3. + +call_primitive_last_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, 0, [ctx, jit_state]) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: e3a0b001 mov fp, #1\n" + " 4: e590a018 ldr sl, [r0, #24]\n" + " 8: e35b0000 cmp fp, #0\n" + " c: 1a000004 bne 0x24\n" + " 10: e592b000 ldr fp, [r2]\n" + " 14: e59d7024 ldr r7, [sp, #36] @ 0x24\n" + " 18: e58db024 str fp, [sp, #36] @ 0x24\n" + " 1c: e1a0e007 mov lr, r7\n" + " 20: e8bd8ff2 pop {r1, r4, r5, r6, r7, r8, r9, sl, fp, pc}" + >>, + ?assertStream(arm32, Dump, Stream). + +jump_to_label_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, 42) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: e3a0b001 mov fp, #1\n" + " 4: e590a018 ldr sl, [r0, #24]\n" + " 8: e35b0000 cmp fp, #0\n" + " c: 1a000000 bne 0x14\n" + " 10: ffffffff @ instruction: 0xffffffff" + >>, + ?assertStream(arm32, Dump, Stream). + +jump_to_offset_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_offset(BSt0, 16#100) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: e3a0b001 mov fp, #1\n" + " 4: e590a018 ldr sl, [r0, #24]\n" + " 8: e35b0000 cmp fp, #0\n" + " c: 1a000000 bne 0x14\n" + " 10: ea00003a b 0x100" + >>, + ?assertStream(arm32, Dump, Stream). + +jump_to_continuation_if_block_preserves_cache_test() -> + State0 = unreachable_test_state(), + {State1, CondReg, OffsetReg, CachedReg} = setup_cached_x_reg0_with_offset(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, fun(BSt0) -> + ?BACKEND:jump_to_continuation(BSt0, {free, OffsetReg}) + end), + Offset2 = ?BACKEND:offset(State2), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + ?assertEqual(CachedReg, Reg), + Offset3 = ?BACKEND:offset(State3), + ?assertEqual(Offset2, Offset3), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: e3a0bc01 mov fp, #256 @ 0x100\n" + " 4: e3a0a001 mov sl, #1\n" + " 8: e5909018 ldr r9, [r0, #24]\n" + " c: e35a0000 cmp sl, #0\n" + " 10: 1a000007 bne 0x34\n" + " 14: e1a0a00f mov sl, pc\n" + " 18: e08bb00a add fp, fp, sl\n" + " 1c: e3e0a01b mvn sl, #27\n" + " 20: e08bb00a add fp, fp, sl\n" + " 24: e59da024 ldr sl, [sp, #36] @ 0x24\n" + " 28: e58db024 str fp, [sp, #36] @ 0x24\n" + " 2c: e1a0e00a mov lr, sl\n" + " 30: e8bd8ff2 pop {r1, r4, r5, r6, r7, r8, r9, sl, fp, pc}" + >>, + ?assertStream(arm32, Dump, Stream). + %% move_array_element to x_reg should invalidate vm_loc cache move_array_element_x_reg_invalidates_vm_loc_cache_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, r11} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), {State2, r10} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), S3 = ?BACKEND:move_array_element(State2, r10, 0, {x_reg, 5}), - {S4, _} = ?BACKEND:move_to_native_register(S3, {x_reg, 5}), + {S4, _Reg} = ?BACKEND:move_to_native_register(S3, {x_reg, 5}), Stream = ?BACKEND:stream(S4), Dump = << " 0: e590b02c ldr fp, [r0, #44] @ 0x2c\n" diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 5e5d395a1f..408f908c16 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2478,6 +2478,33 @@ call_fun_test() -> >>, ?assertStream(arm, Dump, Stream). +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:free_native_registers(State1, [Reg]), + State3 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State2), + {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 9e00 ldr r6, [sp, #0]\n" + " 4: 68b7 ldr r7, [r6, #8]\n" + " 6: 3f01 subs r7, #1\n" + " 8: 60b7 str r7, [r6, #8]\n" + " a: d108 bne.n 0x1e\n" + " c: a703 add r7, pc, #12 @ (adr r7, 0x1c)\n" + " e: 3701 adds r7, #1\n" + " 10: 6077 str r7, [r6, #4]\n" + " 12: 6897 ldr r7, [r2, #8]\n" + " 14: 9e05 ldr r6, [sp, #20]\n" + " 16: 9705 str r7, [sp, #20]\n" + " 18: 46b6 mov lr, r6\n" + " 1a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6987 ldr r7, [r0, #24]" + >>, + ?assertStream(arm, Dump, Stream). + move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), State2 = ?BACKEND:jump_to_offset(State1, 16#100), @@ -4076,6 +4103,33 @@ cached_load_after_free_test() -> >>, ?assertStream(arm, Dump, Stream). +fixed_dst_x_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r3), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 2}), + ?assertEqual(r3, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6a03 ldr r3, [r0, #32]" + >>, + ?assertStream(arm, Dump, Stream). + +fixed_dst_y_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {y_reg, 2}), + ?assertEqual(r1, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 68b9 ldr r1, [r7, #8]" + >>, + ?assertStream(arm, Dump, Stream). + %% Verify that and_ with a negative immediate invalidates the Temp register %% cache entry. Before the fix, the Temp register (used to hold the bics mask) %% kept a stale cache entry, causing a subsequent move_to_native_register for @@ -4164,23 +4218,119 @@ jump_to_label_invalidates_cache_test() -> >>, ?assertStream(arm, Dump, Stream). +unreachable_test_state() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)). + +setup_cached_x_reg0(State0) -> + {State1, CondReg} = ?BACKEND:move_to_native_register(State0, 1), + {State2, CachedReg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + {?BACKEND:free_native_registers(State2, [CachedReg]), CondReg}. + +setup_cached_x_reg0_with_offset(State0) -> + {State1, OffsetReg} = ?BACKEND:move_to_native_register(State0, 16#100), + {State2, CondReg} = ?BACKEND:move_to_native_register(State1, 1), + {State3, CachedReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + {?BACKEND:free_native_registers(State3, [CachedReg]), CondReg, OffsetReg, CachedReg}. + +terminal_if_preserves_cached_x_reg0(State0, TerminalFun) -> + {State1, CondReg} = setup_cached_x_reg0(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, TerminalFun), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State3. + +call_primitive_last_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, 0, [ctx, jit_state]) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 2701 movs r7, #1\n" + " 2: 6986 ldr r6, [r0, #24]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d104 bne.n 0x12\n" + " 8: 6817 ldr r7, [r2, #0]\n" + " a: 9e05 ldr r6, [sp, #20]\n" + " c: 9705 str r7, [sp, #20]\n" + " e: 46b6 mov lr, r6\n" + " 10: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertStream(arm, Dump, Stream). + +jump_to_label_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, 42) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 2701 movs r7, #1\n" + " 2: 6986 ldr r6, [r0, #24]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d105 bne.n 0x14\n" + " 8: ffff ffff @ instruction: 0xffffffff\n" + " c: ffff ffff @ instruction: 0xffffffff\n" + " 10: ffff ffff @ instruction: 0xffffffff" + >>, + ?assertStream(arm, Dump, Stream). + +jump_to_offset_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_offset(BSt0, 16#100) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 2701 movs r7, #1\n" + " 2: 6986 ldr r6, [r0, #24]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: e07a b.n 0x100" + >>, + ?assertStream(arm, Dump, Stream). + +jump_to_continuation_if_block_preserves_cache_test() -> + State0 = unreachable_test_state(), + {State1, CondReg, OffsetReg, CachedReg} = setup_cached_x_reg0_with_offset(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, fun(BSt0) -> + ?BACKEND:jump_to_continuation(BSt0, {free, OffsetReg}) + end), + Offset2 = ?BACKEND:offset(State2), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + ?assertEqual(CachedReg, Reg), + Offset3 = ?BACKEND:offset(State3), + ?assertEqual(Offset2, Offset3), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 27ff movs r7, #255 @ 0xff\n" + " 2: 3701 adds r7, #1\n" + " 4: 2601 movs r6, #1\n" + " 6: 6985 ldr r5, [r0, #24]\n" + " 8: 2e00 cmp r6, #0\n" + " a: d108 bne.n 0x1e\n" + " c: a600 add r6, pc, #0 @ (adr r6, 0x10)\n" + " e: 19bf adds r7, r7, r6\n" + " 10: 260f movs r6, #15\n" + " 12: 4276 negs r6, r6\n" + " 14: 19bf adds r7, r7, r6\n" + " 16: 9e05 ldr r6, [sp, #20]\n" + " 18: 9705 str r7, [sp, #20]\n" + " 1a: 46b6 mov lr, r6\n" + " 1c: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertStream(arm, Dump, Stream). + %% Verify move_array_element to {x_reg, X} invalidates the vm_loc cache entry. -%% Before the fix, a register caching {x_reg, X} would still be considered -%% valid after move_array_element overwrote {x_reg, X} in memory. move_array_element_x_reg_invalidates_vm_loc_cache_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), S3 = ?BACKEND:move_array_element(State2, r6, 0, {x_reg, 5}), - {S4, _} = ?BACKEND:move_to_native_register(S3, {x_reg, 5}), + {S4, _Reg} = ?BACKEND:move_to_native_register(S3, {x_reg, 5}), Stream = ?BACKEND:stream(S4), Dump = << " 0: 6ac7 ldr r7, [r0, #44] ; 0x2c\n" " 2: 6986 ldr r6, [r0, #24]\n" " 4: 6835 ldr r5, [r6, #0]\n" - " 6: 62c5 str r5, [r0, #44] ; 0x2c\n" - " 8: 6ac5 ldr r5, [r0, #44] ; 0x2c" + " 6: 62c5 str r5, [r0, #44] ; 0x2c" >>, ?assertStream(arm, Dump, Stream). @@ -4209,6 +4359,46 @@ ldr_y_reg_invalidates_hidden_temp_cache_test() -> >>, ?assertStream(arm, Dump, Stream). +cached_move_to_vm_x_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 1}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, r7} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 69c7 ldr r7, [r0, #28]\n" + " 2: 6187 str r7, [r0, #24]" + >>, + ?assertStream(arm, Dump, Stream). + +cached_move_to_vm_y_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, r7} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6837 ldr r7, [r6, #0]\n" + " 4: 6187 str r7, [r0, #24]" + >>, + ?assertStream(arm, Dump, Stream). + +cached_move_to_vm_imm_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, 42, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, r7} = ?BACKEND:move_to_native_register(State1, 42), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 272a movs r7, #42\n" + " 2: 6187 str r7, [r0, #24]" + >>, + ?assertStream(arm, Dump, Stream). + %% Verify move_to_native_register for y_reg does not crash with function_clause %% when all other registers are exhausted (AvailT=0). Before the fix, %% first_avail(0) was called in the invalidation code. diff --git a/tests/libs/jit/jit_regs_tests.erl b/tests/libs/jit/jit_regs_tests.erl index 09fefb530d..059f62927e 100644 --- a/tests/libs/jit/jit_regs_tests.erl +++ b/tests/libs/jit/jit_regs_tests.erl @@ -94,6 +94,20 @@ merge_test() -> %% r11 differs: invalidated ?assertEqual(unknown, jit_regs:get_contents(Merged, r11)). +merge_with_unreachable_test() -> + Regs0 = jit_regs:new(), + Regs1 = jit_regs:set_contents(Regs0, rax, {x_reg, 0}), + Regs2 = jit_regs:set_contents(Regs1, r11, {imm, 42}), + Unreachable = jit_regs:unreachable(Regs0), + ?assertEqual({x_reg, 0}, jit_regs:get_contents(jit_regs:merge(Regs2, Unreachable), rax)), + ?assertEqual({imm, 42}, jit_regs:get_contents(jit_regs:merge(Regs2, Unreachable), r11)), + ?assertEqual({x_reg, 0}, jit_regs:get_contents(jit_regs:merge(Unreachable, Regs2), rax)), + ?assertEqual({imm, 42}, jit_regs:get_contents(jit_regs:merge(Unreachable, Regs2), r11)), + ?assertEqual( + unknown, + jit_regs:get_contents(jit_regs:merge(Unreachable, jit_regs:unreachable(Regs0)), rax) + ). + stack_test() -> Regs0 = jit_regs:new(), Regs1 = jit_regs:stack_push(Regs0, rdi), diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 994f21382a..2d94aa7def 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -2066,6 +2066,29 @@ call_fun_test() -> >>, ?assertStream(riscv32, Dump, Stream). +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:free_native_registers(State1, [Reg]), + State3 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State2), + {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 0085af83 lw t6,8(a1)\n" + " 8: 1ffd addi t6,t6,-1\n" + " a: 01f5a423 sw t6,8(a1)\n" + " e: 000f9b63 bnez t6,0x24\n" + " 12: 00000f97 auipc t6,0x0\n" + " 16: 0fc9 addi t6,t6,18 # 0x24\n" + " 18: 0001 nop\n" + " 1a: 01f5a223 sw t6,4(a1)\n" + " 1e: 00862f83 lw t6,8(a2)\n" + " 22: 8f82 jr t6\n" + " 24: 01852f83 lw t6,24(a0)" + >>, + ?assertStream(riscv32, Dump, Stream). + move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), State2 = ?BACKEND:jump_to_offset(State1, 16#100), @@ -3562,6 +3585,33 @@ cached_load_after_free_test() -> >>, ?assertStream(riscv32, Dump, Stream). +fixed_dst_x_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, t6), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 2}), + ?assertEqual(t6, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 02052f83 lw t6,32(a0)" + >>, + ?assertStream(riscv32, Dump, Stream). + +fixed_dst_y_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, t5), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {y_reg, 2}), + ?assertEqual(t5, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008faf03 lw t5,8(t6)" + >>, + ?assertStream(riscv32, Dump, Stream). + %% Verify that and_ with a large positive immediate invalidates the Temp %% register cache entry. Before the fix, the Temp register (used to hold the %% and mask) kept a stale cache entry, causing a subsequent @@ -3628,6 +3678,108 @@ jump_to_label_invalidates_cache_test() -> >>, ?assertStream(riscv32, Dump, Stream). +unreachable_test_state() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)). + +setup_cached_x_reg0(State0) -> + {State1, CondReg} = ?BACKEND:move_to_native_register(State0, 1), + {State2, CachedReg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + {?BACKEND:free_native_registers(State2, [CachedReg]), CondReg}. + +setup_cached_x_reg0_with_offset(State0) -> + {State1, OffsetReg} = ?BACKEND:move_to_native_register(State0, 16#100), + {State2, CondReg} = ?BACKEND:move_to_native_register(State1, 1), + {State3, CachedReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + {?BACKEND:free_native_registers(State3, [CachedReg]), CondReg, OffsetReg, CachedReg}. + +terminal_if_preserves_cached_x_reg0(State0, TerminalFun) -> + {State1, CondReg} = setup_cached_x_reg0(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, TerminalFun), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State3. + +call_primitive_last_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, 0, [ctx, jit_state]) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 01852f03 lw t5,24(a0)\n" + " 6: 000f9563 bnez t6,0x10\n" + " a: 00062f83 lw t6,0(a2)\n" + " e: 8f82 jr t6" + >>, + ?assertStream(riscv32, Dump, Stream). + +jump_to_label_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, 42) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 01852f03 lw t5,24(a0)\n" + " 6: 000f9663 bnez t6,0x12\n" + " a: ffff .insn 2, 0xffff\n" + " c: ffff .insn 2, 0xffff\n" + " e: ffff .insn 2, 0xffff\n" + " 10: ffff .insn 2, 0xffff" + >>, + ?assertStream(riscv32, Dump, Stream). + +jump_to_offset_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_offset(BSt0, 16#100) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 01852f03 lw t5,24(a0)\n" + " 6: 000f9363 bnez t6,0xc\n" + " a: a8dd j 0x100" + >>, + ?assertStream(riscv32, Dump, Stream). + +jump_to_continuation_if_block_preserves_cache_test() -> + State0 = unreachable_test_state(), + {State1, CondReg, OffsetReg, CachedReg} = setup_cached_x_reg0_with_offset(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, fun(BSt0) -> + ?BACKEND:jump_to_continuation(BSt0, {free, OffsetReg}) + end), + Offset2 = ?BACKEND:offset(State2), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + ?assertEqual(CachedReg, Reg), + Offset3 = ?BACKEND:offset(State3), + ?assertEqual(Offset2, Offset3), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 10000f93 li t6,256\n" + " 4: 4f05 li t5,1\n" + " 6: 01852e83 lw t4,24(a0)\n" + " a: 000f1763 bnez t5,0x18\n" + " e: 00000f17 auipc t5,0x0\n" + " 12: 1f49 addi t5,t5,-14 # 0x0\n" + " 14: 9f7e add t5,t5,t6\n" + " 16: 8f02 jr t5" + >>, + ?assertStream(riscv32, Dump, Stream). + +move_array_element_x_reg_invalidates_vm_loc_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:move_array_element(State2, t5, 0, {x_reg, 5}), + {State4, _Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 5}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 02c52f83 lw t6,44(a0)\n" + " 4: 01852f03 lw t5,24(a0)\n" + " 8: 000f2e83 lw t4,0(t5)\n" + " c: 03d52623 sw t4,44(a0)" + >>, + ?assertStream(riscv32, Dump, Stream). + ldr_y_reg_invalidates_hidden_temp_cache_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), @@ -3670,3 +3822,43 @@ y_reg_load_last_available_register_test() -> " 1c: 0002a283 lw t0,0(t0)" >>, ?assertStream(riscv32, Dump, Stream). + +cached_move_to_vm_x_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 1}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, t6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f52c23 sw t6,24(a0)" + >>, + ?assertStream(riscv32, Dump, Stream). + +cached_move_to_vm_y_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, t6} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 01f52c23 sw t6,24(a0)" + >>, + ?assertStream(riscv32, Dump, Stream). + +cached_move_to_vm_imm_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, 42, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, t6} = ?BACKEND:move_to_native_register(State1, 42), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)" + >>, + ?assertStream(riscv32, Dump, Stream). diff --git a/tests/libs/jit/jit_riscv64_tests.erl b/tests/libs/jit/jit_riscv64_tests.erl index d122b230a4..c049ab41f4 100644 --- a/tests/libs/jit/jit_riscv64_tests.erl +++ b/tests/libs/jit/jit_riscv64_tests.erl @@ -2140,6 +2140,31 @@ call_fun_test() -> >>, ?assertStream(riscv64, Dump, Stream). +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:free_native_registers(State1, [Reg]), + State3 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State2), + {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 03053f83 ld t6,48(a0)\n" + " 4: 0105af83 lw t6,16(a1)\n" + " 8: 1ffd addi t6,t6,-1\n" + " a: 01f5a823 sw t6,16(a1)\n" + " e: 000f9d63 bnez t6,0x28\n" + " 12: 00000f97 auipc t6,0x0\n" + " 16: 0fd9 addi t6,t6,22 # 0x28\n" + " 18: 0001 nop\n" + " 1a: 01f5b423 sd t6,8(a1)\n" + " 1e: 4fc1 li t6,16\n" + " 20: 9fb2 add t6,t6,a2\n" + " 22: 000fbf83 ld t6,0(t6)\n" + " 26: 8f82 jr t6\n" + " 28: 03053f83 ld t6,48(a0)" + >>, + ?assertStream(riscv64, Dump, Stream). + move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), State2 = ?BACKEND:jump_to_offset(State1, 16#100), @@ -3657,6 +3682,33 @@ and_positive_imm_invalidates_temp_cache_test() -> >>, ?assertStream(riscv64, Dump, Stream). +fixed_dst_x_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, t6), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 2}), + ?assertEqual(t6, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 04053f83 ld t6,64(a0)" + >>, + ?assertStream(riscv64, Dump, Stream). + +fixed_dst_y_reg_load_preserves_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, t5), + Offset1 = ?BACKEND:offset(State1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {y_reg, 2}), + ?assertEqual(t5, Reg), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 02853f83 ld t6,40(a0)\n" + " 4: 010fbf03 ld t5,16(t6)" + >>, + ?assertStream(riscv64, Dump, Stream). + if_block_cond_free_reg_invalidates_cache_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), @@ -3698,6 +3750,108 @@ jump_to_label_invalidates_cache_test() -> >>, ?assertStream(riscv64, Dump, Stream). +unreachable_test_state() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)). + +setup_cached_x_reg0(State0) -> + {State1, CondReg} = ?BACKEND:move_to_native_register(State0, 1), + {State2, CachedReg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + {?BACKEND:free_native_registers(State2, [CachedReg]), CondReg}. + +setup_cached_x_reg0_with_offset(State0) -> + {State1, OffsetReg} = ?BACKEND:move_to_native_register(State0, 16#100), + {State2, CondReg} = ?BACKEND:move_to_native_register(State1, 1), + {State3, CachedReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + {?BACKEND:free_native_registers(State3, [CachedReg]), CondReg, OffsetReg, CachedReg}. + +terminal_if_preserves_cached_x_reg0(State0, TerminalFun) -> + {State1, CondReg} = setup_cached_x_reg0(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, TerminalFun), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State3. + +call_primitive_last_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, 0, [ctx, jit_state]) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 03053f03 ld t5,48(a0)\n" + " 6: 000f9563 bnez t6,0x10\n" + " a: 00063f83 ld t6,0(a2)\n" + " e: 8f82 jr t6" + >>, + ?assertStream(riscv64, Dump, Stream). + +jump_to_label_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, 42) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 03053f03 ld t5,48(a0)\n" + " 6: 000f9663 bnez t6,0x12\n" + " a: ffff .insn 2, 0xffff\n" + " c: ffff .insn 2, 0xffff\n" + " e: ffff .insn 2, 0xffff\n" + " 10: ffff .insn 2, 0xffff" + >>, + ?assertStream(riscv64, Dump, Stream). + +jump_to_offset_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_offset(BSt0, 16#100) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 03053f03 ld t5,48(a0)\n" + " 6: 000f9363 bnez t6,0xc\n" + " a: a8dd j 0x100" + >>, + ?assertStream(riscv64, Dump, Stream). + +jump_to_continuation_if_block_preserves_cache_test() -> + State0 = unreachable_test_state(), + {State1, CondReg, OffsetReg, CachedReg} = setup_cached_x_reg0_with_offset(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, fun(BSt0) -> + ?BACKEND:jump_to_continuation(BSt0, {free, OffsetReg}) + end), + Offset2 = ?BACKEND:offset(State2), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + ?assertEqual(CachedReg, Reg), + Offset3 = ?BACKEND:offset(State3), + ?assertEqual(Offset2, Offset3), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 10000f93 li t6,256\n" + " 4: 4f05 li t5,1\n" + " 6: 03053e83 ld t4,48(a0)\n" + " a: 000f1763 bnez t5,0x18\n" + " e: 00000f17 auipc t5,0x0\n" + " 12: 1f49 addi t5,t5,-14 # 0x0\n" + " 14: 9f7e add t5,t5,t6\n" + " 16: 8f02 jr t5" + >>, + ?assertStream(riscv64, Dump, Stream). + +move_array_element_x_reg_invalidates_vm_loc_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:move_array_element(State2, t5, 0, {x_reg, 5}), + {State4, _Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 5}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 05853f83 ld t6,88(a0)\n" + " 4: 03053f03 ld t5,48(a0)\n" + " 8: 000f3e83 ld t4,0(t5)\n" + " c: 05d53c23 sd t4,88(a0)" + >>, + ?assertStream(riscv64, Dump, Stream). + ldr_y_reg_invalidates_hidden_temp_cache_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), @@ -3753,3 +3907,43 @@ cached_load_after_free_test() -> " 0: 03053f83 ld t6,48(a0)" >>, ?assertStream(riscv64, Dump, Stream). + +cached_move_to_vm_x_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 1}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, t6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 03853f83 ld t6,56(a0)\n" + " 4: 03f53823 sd t6,48(a0)" + >>, + ?assertStream(riscv64, Dump, Stream). + +cached_move_to_vm_y_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, t6} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 02853f03 ld t5,40(a0)\n" + " 4: 000f3f83 ld t6,0(t5)\n" + " 8: 03f53823 sd t6,48(a0)" + >>, + ?assertStream(riscv64, Dump, Stream). + +cached_move_to_vm_imm_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, 42, {x_reg, 0}), + Offset1 = ?BACKEND:offset(State1), + {State2, t6} = ?BACKEND:move_to_native_register(State1, 42), + ?assertEqual(Offset1, ?BACKEND:offset(State2)), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 02a00f93 li t6,42\n" + " 4: 03f53823 sd t6,48(a0)" + >>, + ?assertStream(riscv64, Dump, Stream). diff --git a/tests/libs/jit/jit_wasm32_tests.erl b/tests/libs/jit/jit_wasm32_tests.erl index f4fbf938f1..2bcc71e4ac 100644 --- a/tests/libs/jit/jit_wasm32_tests.erl +++ b/tests/libs/jit/jit_wasm32_tests.erl @@ -1128,6 +1128,174 @@ call_primitive_last_test() -> ?assertEqual([], ?BACKEND:used_regs(State3)), ok. +unreachable_test_state() -> + State0 = ?BACKEND:new(0, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), + ?BACKEND:add_label(State1, 0). + +setup_cached_x_reg0(State0) -> + {State1, CondReg} = ?BACKEND:move_to_native_register(State0, 1), + {State2, CachedReg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + {?BACKEND:free_native_registers(State2, [CachedReg]), CondReg}. + +setup_cached_x_reg0_with_offset(State0) -> + {State1, OffsetReg} = ?BACKEND:move_to_native_register(State0, 16#100), + {State2, CondReg} = ?BACKEND:move_to_native_register(State1, 1), + {State3, CachedReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + {?BACKEND:free_native_registers(State3, [CachedReg]), CondReg, OffsetReg, CachedReg}. + +terminal_if_preserves_cached_x_reg0(State0, TerminalFun) -> + {State1, CondReg} = setup_cached_x_reg0(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, TerminalFun), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State3. + +call_primitive_last_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, 0, [ctx, jit_state]) + end), + Stream = ?BACKEND:stream(?BACKEND:return_labels_and_lines(State0, [])), + Dump = << + " 0000b3: 08 7f local[3..10] type=i32\n" + " 0000b5: 41 01 i32.const 1\n" + " 0000b7: 21 03 local.set 3\n" + " 0000b9: 20 00 local.get 0\n" + " 0000bb: 28 02 18 i32.load 2 24\n" + " 0000be: 21 04 local.set 4\n" + " 0000c0: 20 03 local.get 3\n" + " 0000c2: 41 00 i32.const 0\n" + " 0000c4: 46 i32.eq\n" + " 0000c5: 04 40 if\n" + " 0000c7: 20 00 local.get 0\n" + " 0000c9: 20 01 local.get 1\n" + " 0000cb: 20 02 local.get 2\n" + " 0000cd: 28 02 00 i32.load 2 0\n" + " 0000d0: 11 01 00 call_indirect 0 (type 1)\n" + " 0000d3: 0f return\n" + " 0000d4: 0b end\n" + " 0000d5: 20 00 local.get 0\n" + " 0000d7: 28 02 18 i32.load 2 24\n" + " 0000da: 21 03 local.set 3\n" + " 0000dc: 20 00 local.get 0\n" + " 0000de: 0f return\n" + " 0000df: 0b end\n" + " 0000e2: 08 7f local[3..10] type=i32\n" + " 0000e4: 20 00 local.get 0\n" + " 0000e6: 0b end" + >>, + ?assertStream(wasm32, Dump, Stream). + +jump_to_label_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, 42) + end), + Stream = ?BACKEND:stream(?BACKEND:return_labels_and_lines(State0, [])), + Dump = << + " 0000b3: 08 7f local[3..10] type=i32\n" + " 0000b5: 41 01 i32.const 1\n" + " 0000b7: 21 03 local.set 3\n" + " 0000b9: 20 00 local.get 0\n" + " 0000bb: 28 02 18 i32.load 2 24\n" + " 0000be: 21 04 local.set 4\n" + " 0000c0: 20 03 local.get 3\n" + " 0000c2: 41 00 i32.const 0\n" + " 0000c4: 46 i32.eq\n" + " 0000c5: 04 40 if\n" + " 0000c7: 20 01 local.get 1\n" + " 0000c9: 41 2b i32.const 43\n" + " 0000cb: 36 02 04 i32.store 2 4\n" + " 0000ce: 20 00 local.get 0\n" + " 0000d0: 0f return\n" + " 0000d1: 0b end\n" + " 0000d2: 20 00 local.get 0\n" + " 0000d4: 28 02 18 i32.load 2 24\n" + " 0000d7: 21 03 local.set 3\n" + " 0000d9: 20 00 local.get 0\n" + " 0000db: 0f return\n" + " 0000dc: 0b end\n" + " 0000df: 08 7f local[3..10] type=i32\n" + " 0000e1: 20 00 local.get 0\n" + " 0000e3: 0b end" + >>, + ?assertStream(wasm32, Dump, Stream). + +jump_to_offset_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_offset(BSt0, 16#100) + end), + Stream = ?BACKEND:stream(?BACKEND:return_labels_and_lines(State0, [])), + Dump = << + " 0000b3: 08 7f local[3..10] type=i32\n" + " 0000b5: 41 01 i32.const 1\n" + " 0000b7: 21 03 local.set 3\n" + " 0000b9: 20 00 local.get 0\n" + " 0000bb: 28 02 18 i32.load 2 24\n" + " 0000be: 21 04 local.set 4\n" + " 0000c0: 20 03 local.get 3\n" + " 0000c2: 41 00 i32.const 0\n" + " 0000c4: 46 i32.eq\n" + " 0000c5: 04 40 if\n" + " 0000c7: 20 01 local.get 1\n" + " 0000c9: 41 3e i32.const 62\n" + " 0000cb: 36 02 04 i32.store 2 4\n" + " 0000ce: 20 00 local.get 0\n" + " 0000d0: 0f return\n" + " 0000d1: 0b end\n" + " 0000d2: 20 00 local.get 0\n" + " 0000d4: 28 02 18 i32.load 2 24\n" + " 0000d7: 21 03 local.set 3\n" + " 0000d9: 20 00 local.get 0\n" + " 0000db: 0f return\n" + " 0000dc: 0b end\n" + " 0000df: 08 7f local[3..10] type=i32\n" + " 0000e1: 20 00 local.get 0\n" + " 0000e3: 0b end" + >>, + ?assertStream(wasm32, Dump, Stream). + +jump_to_continuation_if_block_preserves_cache_test() -> + State0 = unreachable_test_state(), + {State1, CondReg, OffsetReg, _CachedReg} = setup_cached_x_reg0_with_offset(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, fun(BSt0) -> + ?BACKEND:jump_to_continuation(BSt0, {free, OffsetReg}) + end), + {State3, _Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + Stream = ?BACKEND:stream(?BACKEND:return_labels_and_lines(State3, [])), + Dump = << + " 0000b3: 08 7f local[3..10] type=i32\n" + " 0000b5: 41 80 02 i32.const 256\n" + " 0000b8: 21 03 local.set 3\n" + " 0000ba: 41 01 i32.const 1\n" + " 0000bc: 21 04 local.set 4\n" + " 0000be: 20 00 local.get 0\n" + " 0000c0: 28 02 18 i32.load 2 24\n" + " 0000c3: 21 05 local.set 5\n" + " 0000c5: 20 04 local.get 4\n" + " 0000c7: 41 00 i32.const 0\n" + " 0000c9: 46 i32.eq\n" + " 0000ca: 04 40 if\n" + " 0000cc: 20 01 local.get 1\n" + " 0000ce: 20 03 local.get 3\n" + " 0000d0: 41 04 i32.const 4\n" + " 0000d2: 6e i32.div_u\n" + " 0000d3: 41 01 i32.const 1\n" + " 0000d5: 6a i32.add\n" + " 0000d6: 36 02 04 i32.store 2 4\n" + " 0000d9: 20 00 local.get 0\n" + " 0000db: 0f return\n" + " 0000dc: 0b end\n" + " 0000dd: 20 00 local.get 0\n" + " 0000df: 28 02 18 i32.load 2 24\n" + " 0000e2: 21 04 local.set 4\n" + " 0000e4: 20 00 local.get 0\n" + " 0000e6: 0f return\n" + " 0000e7: 0b end\n" + " 0000ea: 08 7f local[3..10] type=i32\n" + " 0000ec: 20 00 local.get 0\n" + " 0000ee: 0b end" + >>, + ?assertStream(wasm32, Dump, Stream). + %%============================================================================= %% Scheduling %%============================================================================= @@ -1178,6 +1346,61 @@ decrement_reductions_test() -> >>, ?assertStream(wasm32, Dump, Stream). +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(0, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), + State2 = ?BACKEND:add_label(State1, 0), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + State5 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State4), + {State6, Reg} = ?BACKEND:move_to_native_register(State5, {x_reg, 0}), + State7 = ?BACKEND:return_labels_and_lines(State6, []), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0000b9: 08 7f local[3..10] type=i32\n" + " 0000bb: 20 00 local.get 0\n" + " 0000bd: 28 02 18 i32.load 2 24\n" + " 0000c0: 21 03 local.set 3\n" + " 0000c2: 20 01 local.get 1\n" + " 0000c4: 28 02 08 i32.load 2 8\n" + " 0000c7: 41 01 i32.const 1\n" + " 0000c9: 6b i32.sub\n" + " 0000ca: 21 03 local.set 3\n" + " 0000cc: 20 01 local.get 1\n" + " 0000ce: 20 03 local.get 3\n" + " 0000d0: 36 02 08 i32.store 2 8\n" + " 0000d3: 20 03 local.get 3\n" + " 0000d5: 45 i32.eqz\n" + " 0000d6: 04 40 if\n" + " 0000d8: 20 01 local.get 1\n" + " 0000da: 41 03 i32.const 3\n" + " 0000dc: 36 02 04 i32.store 2 4\n" + " 0000df: 20 00 local.get 0\n" + " 0000e1: 20 01 local.get 1\n" + " 0000e3: 20 02 local.get 2\n" + " 0000e5: 28 02 08 i32.load 2 8\n" + " 0000e8: 11 01 00 call_indirect 0 (type 1)\n" + " 0000eb: 0f return\n" + " 0000ec: 0b end\n" + " 0000ed: 20 01 local.get 1\n" + " 0000ef: 41 03 i32.const 3\n" + " 0000f1: 36 02 04 i32.store 2 4\n" + " 0000f4: 20 00 local.get 0\n" + " 0000f6: 0f return\n" + " 0000f7: 0b end\n" + " 0000fa: 08 7f local[3..10] type=i32\n" + " 0000fc: 20 00 local.get 0\n" + " 0000fe: 0b end\n" + " 000101: 08 7f local[3..10] type=i32\n" + " 000103: 20 00 local.get 0\n" + " 000105: 28 02 18 i32.load 2 24\n" + " 000108: 21 03 local.set 3\n" + " 00010a: 20 00 local.get 0\n" + " 00010c: 0f return\n" + " 00010d: 0b end" + >>, + ?assertStream(wasm32, Dump, Stream). + call_or_schedule_next_test() -> State0 = ?BACKEND:new(0, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_table(State0, 3), diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 6c30740ec1..6ef0033d9f 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -270,6 +270,111 @@ call_primitive_last_test() -> >>, ?assertStream(x86_64, Dump, Stream). +unreachable_test_state() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)). + +setup_cached_x_reg0(State0) -> + {State1, CondReg} = ?BACKEND:move_to_native_register(State0, 1), + {State2, CachedReg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + {?BACKEND:free_native_registers(State2, [CachedReg]), CondReg}. + +setup_cached_x_reg0_with_offset(State0) -> + {State1, OffsetReg} = ?BACKEND:move_to_native_register(State0, 16#100), + {State2, CondReg} = ?BACKEND:move_to_native_register(State1, 1), + {State3, CachedReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + {?BACKEND:free_native_registers(State3, [CachedReg]), CondReg, OffsetReg, CachedReg}. + +terminal_if_preserves_cached_x_reg0(State0, TerminalFun) -> + {State1, CondReg} = setup_cached_x_reg0(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, TerminalFun), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State3. + +call_primitive_last_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, 0, [ctx, jit_state]) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: b8 01 00 00 00 mov $0x1,%eax\n" + " 5: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" + " 9: 48 85 c0 test %rax,%rax\n" + " c: 75 05 jne 0x13\n" + " e: 48 8b 02 mov (%rdx),%rax\n" + " 11: ff .byte 0xff\n" + " 12: e0 .byte 0xe0" + >>, + ?assertStream(x86_64, Dump, Stream). + +jump_to_label_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, 42) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: b8 01 00 00 00 mov $0x1,%eax\n" + " 5: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" + " 9: 48 85 c0 test %rax,%rax\n" + " c: 75 05 jne 0x13\n" + " e: e9 fc ff ff ff jmp 0xf" + >>, + ?assertStream(x86_64, Dump, Stream). + +jump_to_offset_if_block_preserves_cache_test() -> + State0 = terminal_if_preserves_cached_x_reg0(unreachable_test_state(), fun(BSt0) -> + ?BACKEND:jump_to_offset(BSt0, 16#100) + end), + Stream = ?BACKEND:stream(State0), + Dump = << + " 0: b8 01 00 00 00 mov $0x1,%eax\n" + " 5: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" + " 9: 48 85 c0 test %rax,%rax\n" + " c: 75 05 jne 0x13\n" + " e: e9 ed 00 00 00 jmp 0x100" + >>, + ?assertStream(x86_64, Dump, Stream). + +jump_to_continuation_if_block_preserves_cache_test() -> + State0 = unreachable_test_state(), + {State1, CondReg, OffsetReg, CachedReg} = setup_cached_x_reg0_with_offset(State0), + State2 = ?BACKEND:if_block(State1, {{free, CondReg}, '==', 0}, fun(BSt0) -> + ?BACKEND:jump_to_continuation(BSt0, {free, OffsetReg}) + end), + Offset2 = ?BACKEND:offset(State2), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + ?assertEqual(CachedReg, Reg), + Offset3 = ?BACKEND:offset(State3), + ?assertEqual(Offset2, Offset3), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: b8 00 01 00 00 mov $0x100,%eax\n" + " 5: 41 bb 01 00 00 00 mov $0x1,%r11d\n" + " b: 4c 8b 57 30 mov 0x30(%rdi),%r10\n" + " f: 4d 85 db test %r11,%r11\n" + " 12: 75 0d jne 0x21\n" + " 14: 4c 8d 1d e5 ff ff ff lea -0x1b(%rip),%r11\n" + " 1b: 49 01 c3 add %rax,%r11\n" + " 1e: 41 rex.B\n" + " 1f: ff .byte 0xff\n" + " 20: e3 .byte 0xe3" + >>, + ?assertStream(x86_64, Dump, Stream). + +move_array_element_x_reg_invalidates_vm_loc_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, rax} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + {State2, r11} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:move_array_element(State2, r11, 0, {x_reg, 5}), + {State4, _Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 5}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 48 8b 47 58 mov 0x58(%rdi),%rax\n" + " 4: 4c 8b 5f 30 mov 0x30(%rdi),%r11\n" + " 8: 4d 8b 13 mov (%r11),%r10\n" + " b: 4c 89 57 58 mov %r10,0x58(%rdi)" + >>, + ?assertStream(x86_64, Dump, Stream). + return_if_not_equal_to_ctx_test_() -> {setup, fun() -> @@ -1409,6 +1514,25 @@ call_fun_test() -> >>, ?assertStream(x86_64, Dump, Stream). +decrement_reductions_invalidates_cache_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:free_native_registers(State1, [Reg]), + State3 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State2), + {State4, Reg} = ?BACKEND:move_to_native_register(State3, {x_reg, 0}), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: ff 4e 10 decl 0x10(%rsi)\n" + " 7: 75 11 jne 0x1a\n" + " 9: 48 8d 05 0a 00 00 00 lea 0xa(%rip),%rax # 0x1a\n" + " 10: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 14: 48 8b 42 10 mov 0x10(%rdx),%rax\n" + " 18: ff e0 jmp *%rax\n" + " 1a: 48 8b 47 30 mov 0x30(%rdi),%rax" + >>, + ?assertStream(x86_64, Dump, Stream). + move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), Stream = ?BACKEND:stream(State1), @@ -1903,3 +2027,77 @@ cached_load_after_free_test() -> " 0: 48 8b 47 30 mov 0x30(%rdi),%rax" >>, ?assertStream(x86_64, Dump, Stream). + +%% After storing a large immediate to an x_reg, the temp register holding the +%% immediate is cached so a subsequent load of the same value skips the movabsq +cached_move_to_vm_large_imm_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, 16#100000000, {x_reg, 0}), + {State2, rax} = ?BACKEND:move_to_native_register(State1, 16#100000000), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 48 b8 00 00 00 00 01 movabs $0x100000000,%rax\n" + " 7: 00 00 00 \n" + " a: 48 89 47 30 mov %rax,0x30(%rdi)" + >>, + ?assertStream(x86_64, Dump, Stream). + +%% After copying an x_reg to another vm location, the temp register holding the +%% x_reg value is cached so a subsequent load of the same x_reg skips the mov +cached_move_to_vm_x_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {x_reg, 1}, {x_reg, 0}), + {State2, rax} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 48 8b 47 38 mov 0x38(%rdi),%rax\n" + " 4: 48 89 47 30 mov %rax,0x30(%rdi)" + >>, + ?assertStream(x86_64, Dump, Stream). + +%% After copying a y_reg to an x_reg, the temp register holding the y_reg value +%% is cached so a subsequent load of the same y_reg skips the movs +cached_move_to_vm_y_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_vm_register(State0, {y_reg, 0}, {x_reg, 0}), + {State2, rax} = ?BACKEND:move_to_native_register(State1, {y_reg, 0}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 48 8b 47 28 mov 0x28(%rdi),%rax\n" + " 4: 48 8b 00 mov (%rax),%rax\n" + " 7: 48 89 47 30 mov %rax,0x30(%rdi)" + >>, + ?assertStream(x86_64, Dump, Stream). + +%% After storing an x_reg value to an array element, the temp register holding +%% the x_reg value is cached so a subsequent load of that x_reg skips the mov +cached_move_to_array_element_x_reg_reuse_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r11, 2), + {State2, rax} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 49 89 43 10 mov %rax,0x10(%r11)" + >>, + ?assertStream(x86_64, Dump, Stream). + +%% After an if_block with a large-immediate condition, the temp register loaded +%% with that immediate is cached, so the block body can reuse it without emitting +%% a redundant movabsq +if_block_large_cond_reuse_imm_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, rax} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {rax, '<', 16#100000000}, fun(BSt0) -> + {BSt1, _Reg} = ?BACKEND:move_to_native_register(BSt0, 16#100000000), + BSt1 + end), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 49 bb 00 00 00 00 01 movabs $0x100000000,%r11\n" + " b: 00 00 00 \n" + " e: 4c 39 d8 cmp %r11,%rax\n" + " 11: 7d 00 jge 0x13" + >>, + ?assertStream(x86_64, Dump, Stream).