Skip to content

Commit

Permalink
Revert "[ARM][LowOverheadLoops] Adjust Start insertion."
Browse files Browse the repository at this point in the history
This reverts commit 38f625d.

This commit contains some holes in its logic and has been causing
issues since it was commited. The idea sounds OK but some cases were not
handled correctly. Instead of trying to fix that up later it is probably
simpler to revert it and work to reimplement it in a more reliable way.
  • Loading branch information
davemgreen committed Oct 20, 2020
1 parent 39613c2 commit 6dcbc32
Show file tree
Hide file tree
Showing 16 changed files with 162 additions and 91 deletions.
62 changes: 38 additions & 24 deletions llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
Expand Up @@ -646,10 +646,47 @@ bool LowOverheadLoop::ValidateTailPredicate() {
return false;
}

// The element count register maybe defined after InsertPt, in which case we
// need to try to move either InsertPt or the def so that the [w|d]lstp can
// use the value.

if (StartInsertPt != StartInsertBB->end() &&
!RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
if (auto *ElemDef = RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
ElemDef->removeFromParent();
StartInsertBB->insert(StartInsertPt, ElemDef);
LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: "
<< *ElemDef);
} else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
StartInsertPt->removeFromParent();
StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
&*StartInsertPt);
LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
} else {
// If we fail to move an instruction and the element count is provided
// by a mov, use the mov operand if it will have the same value at the
// insertion point
MachineOperand Operand = ElemDef->getOperand(1);
if (isMovRegOpcode(ElemDef->getOpcode()) &&
RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) ==
RDA.getUniqueReachingMIDef(&*StartInsertPt, Operand.getReg())) {
TPNumElements = Operand;
NumElements = TPNumElements.getReg();
} else {
LLVM_DEBUG(dbgs()
<< "ARM Loops: Unable to move element count to loop "
<< "start instruction.\n");
return false;
}
}
}
}

// Could inserting the [W|D]LSTP cause some unintended affects? In a perfect
// world the [w|d]lstp instruction would be last instruction in the preheader
// and so it would only affect instructions within the loop body. But due to
// scheduling, and/or the logic in this pass, the insertion point can
// scheduling, and/or the logic in this pass (above), the insertion point can
// be moved earlier. So if the Loop Start isn't the last instruction in the
// preheader, and if the initial element count is smaller than the vector
// width, the Loop Start instruction will immediately generate one or more
Expand Down Expand Up @@ -1068,35 +1105,12 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
return true;
};

// We know that we can define safely LR at InsertPt, but maybe we could
// push the insertion point to later on in the basic block.
auto TryAdjustInsertionPoint = [](MachineBasicBlock::iterator &InsertPt,
MachineInstr *Start,
ReachingDefAnalysis &RDA) {

MachineBasicBlock *MBB = InsertPt->getParent();
MachineBasicBlock::iterator FirstNonTerminator =
MBB->getFirstTerminator();
unsigned CountReg = Start->getOperand(0).getReg();

// Get the latest possible insertion point and check whether the semantics
// will be maintained if Start was inserted there.
if (FirstNonTerminator == MBB->end()) {
if (RDA.isReachingDefLiveOut(Start, CountReg) &&
RDA.isReachingDefLiveOut(Start, ARM::LR))
InsertPt = FirstNonTerminator;
} else if (RDA.hasSameReachingDef(Start, &*FirstNonTerminator, CountReg) &&
RDA.hasSameReachingDef(Start, &*FirstNonTerminator, ARM::LR))
InsertPt = FirstNonTerminator;
};

if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA,
ToRemove)) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
Revert = true;
return;
}
TryAdjustInsertionPoint(StartInsertPt, Start, RDA);
LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end())
dbgs() << "ARM Loops: Will insert LoopStart at end of block\n";
else
Expand Down
Expand Up @@ -153,17 +153,25 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
; CHECK: renamable $r3, dead $cpsr = tADDi3 renamable $r2, 3, 14 /* CC::al */, $noreg
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $q1, $r0, $r1
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv12, align 4)
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1315, align 4)
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: liveins: $lr, $q1, $r0, $r1, $r2
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: MVE_VPST 2, implicit $vpr
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv12, align 4)
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1315, align 4)
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.middle.block:
; CHECK: liveins: $q1
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
Expand Down Expand Up @@ -277,18 +285,27 @@ body: |
; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
; CHECK: dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg
; CHECK: frame-setup CFI_INSTRUCTION def_cfa_register $r7
; CHECK: renamable $r3, dead $cpsr = tMOVi8 3, 14 /* CC::al */, $noreg
; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, renamable $r2, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
; CHECK: renamable $lr = t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3 = tLDRpci %const.0, 14 /* CC::al */, $noreg :: (load 4 from constant-pool)
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1
; CHECK: renamable $r2, dead $cpsr = tLSRri killed renamable $r2, 2, 14 /* CC::al */, $noreg
; CHECK: $s4 = VMOVS killed $s0, 14 /* CC::al */, $noreg, implicit killed $q1, implicit-def $q1
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r2
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $q1, $r0, $r1
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.lsr.iv13, align 4)
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 0, $noreg :: (load 16 from %ir.lsr.iv1416, align 4)
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 0, killed $noreg
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: liveins: $lr, $q1, $r0, $r1, $r2
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg
; CHECK: renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14 /* CC::al */, $noreg
; CHECK: MVE_VPST 2, implicit $vpr
; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv13, align 4)
; CHECK: renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr :: (load 16 from %ir.lsr.iv1416, align 4)
; CHECK: renamable $q1 = MVE_VFMAf32 killed renamable $q1, killed renamable $q2, killed renamable $q0, 1, killed renamable $vpr
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.middle.block:
; CHECK: liveins: $q1
; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
Expand Down
15 changes: 9 additions & 6 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
Expand Up @@ -163,14 +163,17 @@ body: |
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg
; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg
; CHECK: $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r4 = tMOVr $lr, 14 /* CC::al */, $noreg
; CHECK: bb.1.do.body.i:
; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r4, $r12
; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1
; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg
; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: MVE_VPST 4, implicit $vpr
; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4)
; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.1
; CHECK: bb.2.arm_mean_f32_mve.exit:
; CHECK: successors: %bb.3(0x80000000)
; CHECK: liveins: $q0, $r0, $r1, $r2, $r4
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll
Expand Up @@ -17,13 +17,16 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float
; CHECK-NEXT: add.w lr, r12, r3, lsr #2
; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: mov r12, r0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: mov r4, lr
; CHECK-NEXT: dlstp.32 lr, r3
; CHECK-NEXT: .LBB0_1: @ %do.body.i
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r12], #16
; CHECK-NEXT: vadd.f32 q0, q0, q1
; CHECK-NEXT: letp lr, .LBB0_1
; CHECK-NEXT: vctp.32 r3
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrwt.u32 q1, [r12], #16
; CHECK-NEXT: vaddt.f32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %arm_mean_f32_mve.exit
; CHECK-NEXT: vmov s4, r1
; CHECK-NEXT: vadd.f32 s0, s3, s3
Expand Down
Expand Up @@ -117,21 +117,32 @@ body: |
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12
; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1)
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1)
; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1)
; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1)
; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4)
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4)
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
bb.0.entry:
Expand Down
23 changes: 17 additions & 6 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/move-start-after-def.mir
Expand Up @@ -117,21 +117,32 @@ body: |
; CHECK: bb.1.vector.ph:
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $r0, $r1, $r2, $r3
; CHECK: renamable $r12 = t2MOVi 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $lr = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = nuw t2ADDrs killed renamable $r12, renamable $r3, 11, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2BICri killed renamable $r12, 3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = t2DLS killed renamable $lr
; CHECK: $r12 = t2MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg
; CHECK: renamable $r3, dead $cpsr = tMOVi8 0, 14 /* CC::al */, $noreg
; CHECK: renamable $r12 = t2LSRri killed renamable $r12, 1, 14 /* CC::al */, $noreg, $noreg
; CHECK: $lr = MVE_DLSTP_32 killed renamable $r12
; CHECK: bb.2.vector.body:
; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r12
; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r1, renamable $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep45, align 1)
; CHECK: renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $q0 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep45, align 1)
; CHECK: renamable $r4, dead $cpsr = tADDrr renamable $r2, renamable $r3, 14 /* CC::al */, $noreg
; CHECK: renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 0, $noreg :: (load 4 from %ir.scevgep23, align 1)
; CHECK: renamable $r12 = t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $q1 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr :: (load 4 from %ir.scevgep23, align 1)
; CHECK: renamable $q0 = nuw nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 0, killed $noreg :: (store 16 into %ir.lsr.iv1, align 4)
; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
; CHECK: MVE_VPST 8, implicit $vpr
; CHECK: renamable $r0 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r0, 16, 1, killed renamable $vpr :: (store 16 into %ir.lsr.iv1, align 4)
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
bb.0.entry:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
Expand Up @@ -451,9 +451,9 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: add.w lr, r3, r6, lsr #2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: vmov.32 q0[0], r12
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: .LBB6_5: @ %vector.body46
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r2
Expand Down Expand Up @@ -686,8 +686,8 @@ define i32 @wrongop(%struct.date* nocapture readonly %pd) {
; CHECK-NEXT: mla r2, r4, r3, r2
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vdup.32 q0, r3
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: .LBB8_6: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vctp.32 r1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
Expand Up @@ -1156,8 +1156,8 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, half* noca
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
; CHECK-NEXT: ldr.w lr, [sp] @ 4-byte Reload
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: dls lr, lr
; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: .LBB16_6: @ %for.body
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
Expand Down

0 comments on commit 6dcbc32

Please sign in to comment.