Skip to content

Commit

Permalink
[AArch64] Remove superfluous sxtw in peephole opt (llvm#96293)
Browse files Browse the repository at this point in the history
Across a basic-block we might have in i32 extract from a value that only
operates on upper bits (for example a sxtw). We can replace the COPY
with a new version skipping the sxtw.
  • Loading branch information
davemgreen authored and aaryanshukla committed Jul 14, 2024
1 parent b25a545 commit e3ce9c9
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 8 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
bool visitINSvi64lane(MachineInstr &MI);
bool visitFMOVDr(MachineInstr &MI);
bool visitCopy(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
Expand Down Expand Up @@ -690,6 +691,34 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
return true;
}

// Across a basic-block we might have in i32 extract from a value that only
// operates on upper bits (for example a sxtw). We can replace the COPY with a
// new version skipping the sxtw.
bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
Register InputReg = MI.getOperand(1).getReg();
if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
!MRI->hasOneNonDBGUse(InputReg))
return false;

MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
MachineInstr *CopyMI = SrcMI;
while (SrcMI && SrcMI->isFullCopy() &&
MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg()))
SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());

if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
return false;

Register SrcReg = SrcMI->getOperand(1).getReg();
MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
MI.getOperand(1).setReg(SrcReg);
if (CopyMI != SrcMI)
CopyMI->eraseFromParent();
SrcMI->eraseFromParent();
return true;
}

bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
Expand Down Expand Up @@ -771,6 +800,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::FMOVDr:
Changed |= visitFMOVDr(MI);
break;
case AArch64::COPY:
Changed |= visitCopy(MI);
break;
}
}
}
Expand Down
46 changes: 46 additions & 0 deletions llvm/lib/Target/AArch64/peephole-sxtw.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s

---
name: removeSxtw
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: removeSxtw
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr64 = COPY $x0
%1:gpr64 = SBFMXri %0:gpr64, 0, 31
%2:gpr32sp = COPY %1.sub_32:gpr64
%3:gpr32sp = ADDWri %2:gpr32sp, 1, 0
$w0 = COPY %3:gpr32sp
RET_ReallyLR implicit $w0
...
---
name: extraCopy
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: extraCopy
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:gpr64 = COPY $x0
%1:gpr64 = SBFMXri %0:gpr64, 0, 31
%2:gpr64all = COPY %1:gpr64
%3:gpr32sp = COPY %2.sub_32:gpr64all
%4:gpr32sp = ADDWri %3:gpr32sp, 1, 0
$w0 = COPY %4:gpr32sp
RET_ReallyLR implicit $w0
...
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) {
; CHECK-LABEL: smull_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
; CHECK-NEXT: sxtw x9, w1
; CHECK-NEXT: smull x0, w8, w9
; CHECK-NEXT: smull x0, w8, w1
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
Expand Down Expand Up @@ -490,8 +489,7 @@ define i64 @smaddl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
; CHECK-LABEL: smaddl_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
; CHECK-NEXT: sxtw x9, w1
; CHECK-NEXT: smaddl x0, w8, w9, x2
; CHECK-NEXT: smaddl x0, w8, w1, x2
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
Expand Down Expand Up @@ -654,8 +652,7 @@ define i64 @smnegl_ldrsw_shift(ptr %x0, i64 %x1) {
; CHECK-LABEL: smnegl_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
; CHECK-NEXT: sxtw x9, w1
; CHECK-NEXT: smnegl x0, w8, w9
; CHECK-NEXT: smnegl x0, w8, w1
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
Expand Down Expand Up @@ -818,8 +815,7 @@ define i64 @smsubl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
; CHECK-LABEL: smsubl_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
; CHECK-NEXT: sxtw x9, w1
; CHECK-NEXT: smsubl x0, w8, w9, x2
; CHECK-NEXT: smsubl x0, w8, w1, x2
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
Expand Down

0 comments on commit e3ce9c9

Please sign in to comment.