From 7f3b3584cb287d6e53bf8415787bd05867e806d8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 19 Mar 2024 20:16:18 +0100 Subject: [PATCH] [VPlan] Replace disjoint or with add instead of dropping disjoint. (#83821) Dropping disjoint from an OR may yield incorrect results, as some analysis may have converted it to an Add implicitly (e.g. SCEV used for dependence analysis). Instead, replace it with an equivalent Add. This is possible as all users of the disjoint OR only access lanes where the operands are disjoint or poison otherwise. Note that replacing all disjoint ORs with ADDs instead of dropping the flags is not strictly necessary. It is only needed for disjoint ORs that SCEV treated as ADDs, but those are not tracked. There are other places that may drop poison-generating flags; those likely need similar treatment. Fixes https://github.com/llvm/llvm-project/issues/81872 PR: https://github.com/llvm/llvm-project/pull/83821 --- .../Vectorize/LoopVectorizationPlanner.h | 3 +++ llvm/lib/Transforms/Vectorize/VPlan.h | 6 ++++++ .../Transforms/Vectorize/VPlanPatternMatch.h | 5 +++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 17 +++++++++++++++++ .../Transforms/LoopVectorize/X86/pr81872.ll | 2 +- 5 files changed, 32 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index e86705e898890..5d03b66b0ce33 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -68,6 +68,9 @@ class VPBuilder { public: VPBuilder() = default; VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); } + VPBuilder(VPRecipeBase *InsertPt) { + setInsertPoint(InsertPt->getParent(), InsertPt->getIterator()); + } /// Clear the insertion point: created instructions will not be inserted into /// a block. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index d77c7554d50e4..d720f17a3a881 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1127,6 +1127,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { return WrapFlags.HasNSW; } + bool isDisjoint() const { + assert(OpType == OperationType::DisjointOp && + "recipe cannot have a disjoing flag"); + return DisjointFlags.IsDisjoint; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void printFlags(raw_ostream &O) const; #endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index aa25359069451..a03a408686ef1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -261,6 +261,11 @@ m_Mul(const Op0_t &Op0, const Op1_t &Op1) { return m_Binary(Op0, Op1); } +template +inline AllBinaryRecipe_match +m_Or(const Op0_t &Op0, const Op1_t &Op1) { + return m_Binary(Op0, Op1); +} } // namespace VPlanPatternMatch } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a91ccefe4b6d7..c6ec99fbbf0a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1216,6 +1216,23 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( // load/store. If the underlying instruction has poison-generating flags, // drop them directly. if (auto *RecWithFlags = dyn_cast(CurRec)) { + VPValue *A, *B; + using namespace llvm::VPlanPatternMatch; + // Dropping disjoint from an OR may yield incorrect results, as some + // analysis may have converted it to an Add implicitly (e.g. SCEV used + // for dependence analysis). Instead, replace it with an equivalent Add. + // This is possible as all users of the disjoint OR only access lanes + // where the operands are disjoint or poison otherwise. + if (match(RecWithFlags, m_Or(m_VPValue(A), m_VPValue(B))) && + RecWithFlags->isDisjoint()) { + VPBuilder Builder(RecWithFlags); + VPInstruction *New = Builder.createOverflowingOp( + Instruction::Add, {A, B}, {false, false}, + RecWithFlags->getDebugLoc()); + RecWithFlags->replaceAllUsesWith(New); + RecWithFlags->eraseFromParent(); + CurRec = New; + } RecWithFlags->dropPoisonGeneratingFlags(); } else { Instruction *Instr = dyn_cast_or_null( diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 14acb6f57aa0c..3f38abc75a583 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -29,7 +29,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3