diff --git a/cranelift-codegen/src/licm.rs b/cranelift-codegen/src/licm.rs index cb3dbd87a..14c4630dc 100644 --- a/cranelift-codegen/src/licm.rs +++ b/cranelift-codegen/src/licm.rs @@ -5,7 +5,9 @@ use crate::dominator_tree::DominatorTree; use crate::entity::{EntityList, ListPool}; use crate::flowgraph::{BasicBlock, ControlFlowGraph}; use crate::fx::FxHashSet; -use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value}; +use crate::ir::{ + DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value, +}; use crate::isa::TargetIsa; use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::timing; @@ -145,8 +147,7 @@ fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) /// Test whether the given opcode is unsafe to even consider for LICM. fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { - opcode.can_load() - || opcode.can_store() + opcode.can_store() || opcode.is_call() || opcode.is_branch() || opcode.is_terminator() @@ -156,12 +157,25 @@ fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { || opcode.writes_cpu_flags() } +fn is_unsafe_load(inst_data: &InstructionData) -> bool { + match *inst_data { + InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => { + !flags.readonly() || !flags.notrap() + } + _ => inst_data.opcode().can_load(), + } +} + /// Test whether the given instruction is loop-invariant. fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet) -> bool { if trivially_unsafe_for_licm(dfg[inst].opcode()) { return false; } + if is_unsafe_load(&dfg[inst]) { + return false; + } + let inst_args = dfg.inst_args(inst); for arg in inst_args { let arg = dfg.resolve_aliases(*arg); diff --git a/filetests/licm/load_readonly_notrap.clif b/filetests/licm/load_readonly_notrap.clif new file mode 100644 index 000000000..9a9d2dcbf --- /dev/null +++ b/filetests/licm/load_readonly_notrap.clif @@ -0,0 +1,48 @@ +test licm + +target x86_64 + +;; Nontrapping readonly load from address that is not loop-dependent +;; should be hoisted out of loop. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 notrap aligned readonly v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: v6 = load.i32 notrap aligned readonly v5 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/filetests/licm/reject_load_notrap.clif b/filetests/licm/reject_load_notrap.clif new file mode 100644 index 000000000..1d26faa71 --- /dev/null +++ b/filetests/licm/reject_load_notrap.clif @@ -0,0 +1,49 @@ +test licm + +target x86_64 + +;; Nontrapping possibly-not-readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v6 = load.i32 notrap aligned v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v6 = load.i32 notrap aligned v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/filetests/licm/reject_load_readonly.clif b/filetests/licm/reject_load_readonly.clif new file mode 100644 index 000000000..5b6a41171 --- /dev/null +++ b/filetests/licm/reject_load_readonly.clif @@ -0,0 +1,49 @@ +test licm + +target x86_64 + +;; Maybe-trapping readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be hoisted. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 aligned readonly v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v6 = load.i32 aligned readonly v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: }