From dc09c9f767d842d08697ea178766447b402e6412 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Fri, 5 Apr 2019 15:06:52 +0200 Subject: [PATCH 1/4] Allow readonly nontrapping loads to be hoisted by licm --- cranelift-codegen/src/licm.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/cranelift-codegen/src/licm.rs b/cranelift-codegen/src/licm.rs index cb3dbd87a..baaa8600d 100644 --- a/cranelift-codegen/src/licm.rs +++ b/cranelift-codegen/src/licm.rs @@ -5,7 +5,7 @@ use crate::dominator_tree::DominatorTree; use crate::entity::{EntityList, ListPool}; use crate::flowgraph::{BasicBlock, ControlFlowGraph}; use crate::fx::FxHashSet; -use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value}; +use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value}; use crate::isa::TargetIsa; use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::timing; @@ -145,8 +145,7 @@ fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) /// Test whether the given opcode is unsafe to even consider for LICM. fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { - opcode.can_load() - || opcode.can_store() + opcode.can_store() || opcode.is_call() || opcode.is_branch() || opcode.is_terminator() @@ -156,12 +155,25 @@ fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { || opcode.writes_cpu_flags() } +fn is_unsafe_load(inst_data: &InstructionData) -> bool { + match *inst_data { + InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => { + !flags.readonly() || !flags.notrap() + } + _ => inst_data.opcode().can_load(), + } +} + /// Test whether the given instruction is loop-invariant. fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet) -> bool { if trivially_unsafe_for_licm(dfg[inst].opcode()) { return false; } + if is_unsafe_load(&dfg[inst]) { + return false; + } + let inst_args = dfg.inst_args(inst); for arg in inst_args { let arg = dfg.resolve_aliases(*arg); From 9af22a79f0105a674e2abfa9c00ccbd88625b7d2 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Sun, 7 Apr 2019 18:36:15 +0200 Subject: [PATCH 2/4] Format --- cranelift-codegen/src/licm.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cranelift-codegen/src/licm.rs b/cranelift-codegen/src/licm.rs index baaa8600d..14c4630dc 100644 --- a/cranelift-codegen/src/licm.rs +++ b/cranelift-codegen/src/licm.rs @@ -5,7 +5,9 @@ use crate::dominator_tree::DominatorTree; use crate::entity::{EntityList, ListPool}; use crate::flowgraph::{BasicBlock, ControlFlowGraph}; use crate::fx::FxHashSet; -use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value}; +use crate::ir::{ + DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value, +}; use crate::isa::TargetIsa; use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::timing; From b9fdc6657bc65f0664872fc0bf7739fd70300f0b Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Sun, 7 Apr 2019 19:27:17 +0200 Subject: [PATCH 3/4] TC: LICM of nontrapping readonly load --- filetests/licm/load_readonly_notrap.clif | 48 ++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 filetests/licm/load_readonly_notrap.clif diff --git a/filetests/licm/load_readonly_notrap.clif b/filetests/licm/load_readonly_notrap.clif new file mode 100644 index 000000000..7ccb80293 --- /dev/null +++ b/filetests/licm/load_readonly_notrap.clif @@ -0,0 +1,48 @@ +test licm + +target x86_64 + +;; Nontrapping readonly load from address that is not loop-dependent +;; should be hoisted out of loop. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 notrap aligned readonly v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 + +} +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: v6 = load.i32 notrap aligned readonly v5 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: } From 0b05d0c49f5b0bf436fbb3e14e05d016d9cd2a4d Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Sun, 7 Apr 2019 19:33:58 +0200 Subject: [PATCH 4/4] More test cases --- filetests/licm/load_readonly_notrap.clif | 12 +++--- filetests/licm/reject_load_notrap.clif | 49 ++++++++++++++++++++++++ filetests/licm/reject_load_readonly.clif | 49 ++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 6 deletions(-) create mode 100644 filetests/licm/reject_load_notrap.clif create mode 100644 filetests/licm/reject_load_readonly.clif diff --git a/filetests/licm/load_readonly_notrap.clif b/filetests/licm/load_readonly_notrap.clif index 7ccb80293..9a9d2dcbf 100644 --- a/filetests/licm/load_readonly_notrap.clif +++ b/filetests/licm/load_readonly_notrap.clif @@ -24,8 +24,8 @@ ebb1(v2: i32, v3: i64): ebb2(v9: i32): return v9 - } + ; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { ; nextln: gv0 = vmctx ; nextln: gv1 = load.i64 notrap aligned readonly gv0 @@ -38,11 +38,11 @@ ebb2(v9: i32): ; nextln: jump ebb1(v0, v1) ; nextln: ; nextln: ebb1(v2: i32, v3: i64): -; nextln: v7 = iadd v2, v6 -; nextln: brz v2, ebb2(v2) -; nextln: v8 = isub v2, v4 -; nextln: jump ebb1(v8, v3) +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) ; nextln: ; nextln: ebb2(v9: i32): -; nextln: return v9 +; nextln: return v9 ; nextln: } diff --git a/filetests/licm/reject_load_notrap.clif b/filetests/licm/reject_load_notrap.clif new file mode 100644 index 000000000..1d26faa71 --- /dev/null +++ b/filetests/licm/reject_load_notrap.clif @@ -0,0 +1,49 @@ +test licm + +target x86_64 + +;; Nontrapping possibly-not-readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v6 = load.i32 notrap aligned v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v6 = load.i32 notrap aligned v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/filetests/licm/reject_load_readonly.clif b/filetests/licm/reject_load_readonly.clif new file mode 100644 index 000000000..5b6a41171 --- /dev/null +++ b/filetests/licm/reject_load_readonly.clif @@ -0,0 +1,49 @@ +test licm + +target x86_64 + +;; Maybe-trapping readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be hoisted. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 aligned readonly v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v6 = load.i32 aligned readonly v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: }