Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,11 +557,8 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AIE2P::mMvSclSrcRegClass.contains(SrcReg) &&
AIE2P::mMvSclDstRegClass.contains(DstReg)) {
// Build MultiSlotPseudo in preference
unsigned Opcode = (AIE2P::mAguSrcRegClass.contains(SrcReg) &&
AIE2P::mAguDstRegClass.contains(DstReg))
? AIE2P::MOV_scalar_pseudo
: AIE2P::MOV_alu_mv_mv_mv_scl;
BuildMI(MBB, MBBI, DL, get(Opcode), DstReg)
const unsigned MOVSclOpcode = getScalarMovOpcode(DstReg, SrcReg);
BuildMI(MBB, MBBI, DL, get(MOVSclOpcode), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else if ((AIE2P::eLRegClass.contains(SrcReg)) &&
(AIE2P::eLRegClass.contains(DstReg))) {
Expand Down Expand Up @@ -1179,6 +1176,17 @@ unsigned AIE2PInstrInfo::getConstantMovOpcode(MachineRegisterInfo &MRI,
llvm_unreachable("Expected imm. size <= 32 bits");
}

unsigned AIE2PInstrInfo::getScalarMovOpcode(Register DstReg,
Register SrcReg) const {
return (AIE2P::eRRegClass.contains(SrcReg) &&
AIE2P::eRRegClass.contains(DstReg))
? AIE2P::MOV_OR_pseudo
: (AIE2P::mAguSrcRegClass.contains(SrcReg) &&
AIE2P::mAguDstRegClass.contains(DstReg))
? AIE2P::MOV_scalar_pseudo
: AIE2P::MOV_alu_mv_mv_mv_scl;
}

unsigned AIE2PInstrInfo::getCycleSeparatorOpcode() const {
return AIE2P::CYCLE_SEPARATOR;
}
Expand All @@ -1192,7 +1200,8 @@ bool AIE2PInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AIE2P::PseudoMove: {
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
BuildMI(MBB, MI, DL, get(AIE2P::MOV_alu_mv_mv_mv_scl), Dst)
const unsigned MOVSclOpcode = getScalarMovOpcode(Dst, Src);
BuildMI(MBB, MI, DL, get(MOVSclOpcode), Dst)
.addReg(Src, getKillRegState(MI.getOperand(1).isKill()));
MI.eraseFromParent();
return true;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo {
unsigned getPseudoMoveOpcode() const override;
unsigned getConstantMovOpcode(MachineRegisterInfo &MRI, unsigned int Reg,
APInt &Val) const override;
unsigned getScalarMovOpcode(Register DstReg, Register SrcReg) const override;
unsigned getMvScl2MS(unsigned ConstTLastVal) const override;
unsigned getMvNBScl2MS(unsigned ConstTLastVal) const override;
unsigned getMvScl2MSTlastRegOpcode() const override;
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@ let hasDelaySlot = true, isBranch = true, isTerminator = true,
}
}

// Modified OR instruction to mimic MOV operation
// OR $dst, $src, $src is equivalent to MOV $dst, $src
let Itinerary = II_OR, isCodeGenOnly = 1, hasSideEffects = false,
mayLoad = 0, mayStore = 0 in {
def MOV_OR : AIE2P_inst_alu_instr32 <(outs eR:$d0), (ins eR:$s0), "or", " $d0, $s0, $s0">{
bits<5> s0;
bits<5> d0;
let alu = {s0, d0, s0, 0b0101, 0b1};
}
}

include "aie2p/AIE2PMultiSlotPseudoInstrInfo.td"
// Define _split variants for instructions using 2D registers
class Split2DInstr<Instruction RealInst, int opidx> : SplitPseudo<RealInst,
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PMultiSlotPseudoInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ let Itinerary = II_MOVS, hasSideEffects = false, mayLoad = false, mayStore = fal
"MOV_scalar_pseudo", "$dst, $src", [MOV_alu_mv_mv_mv_scl, MOVS] >;
}

// We use OR to mimic MOV behavior, and not ADD with 0 because ADD uses $srCarry register
let Itinerary = II_MOV_alu_mv_mv_mv_scl, isMoveReg = 1, hasSideEffects = false, mayLoad = false, mayStore = false in {
def MOV_OR_pseudo : MultiSlot_Pseudo<(outs eR:$mRx), (ins eR:$mRx0),
"mov_scl_pseudo", "$mRx, $mRx0",
[MOV_alu_mv_mv_mv_scl, MOV_OR]>;
}

// Pseudo VLD
let hasSideEffects = false, mayLoad = true, mayStore = false in {
// Fifo fill.
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AIE/aie2p/cascade-stream.ll
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ define dso_local inreg noundef <32 x i32> @_Z28test_get_scd_expand_v32acc32ii(i3
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: vmov dm0, scd, r31; nopb ; nopxm
; CHECK-NEXT: ret lr
; CHECK-NEXT: mov r31, r1 // Delay Slot 5
; CHECK-NEXT: mov crscden, r0 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: or r31, r1, r1; mov crscden, r0 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: mov crscden, #1 // Delay Slot 1
Expand All @@ -167,8 +167,8 @@ define dso_local inreg noundef <64 x i32> @_Z28test_get_scd_expand_v64acc32ii(i3
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: vmov dm0, scd, r31; nopb ; nopxm
; CHECK-NEXT: ret lr
; CHECK-NEXT: mov r31, r1 // Delay Slot 5
; CHECK-NEXT: mov crscden, r0 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: or r31, r1, r1; mov crscden, r0 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: mov crscden, #1 // Delay Slot 1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AIE/aie2p/insert-element-64bits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ define dso_local noundef <64 x i8> @insert_element_64(<8 x i64> noundef %v, i32
; CHECK-NEXT: vlda bmll0, [sp, #-64]; nopb ; nops ; nopxm ; nopv
; CHECK-NEXT: nopx
; CHECK-NEXT: nop
; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: nop
; CHECK-NEXT: ret lr
; CHECK-NEXT: mov r29, r0 // Delay Slot 5
; CHECK-NEXT: mov r5, r2 // Delay Slot 4
; CHECK-NEXT: vmov x0, bmll0 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: or r4, r1, r1; mov r29, r0 // Delay Slot 4
; CHECK-NEXT: or r5, r2, r2; vmov x0, bmll0 // Delay Slot 3
; CHECK-NEXT: vinsert.64 x0, x0, r29, r5:r4 // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
Expand Down
52 changes: 51 additions & 1 deletion llvm/test/CodeGen/AIE/aie2p/postrapseudos/pseudomove.mir
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,56 @@ alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: test_pseudomove
; CHECK: $r0 = MOV_alu_mv_mv_mv_scl killed $r8
; CHECK: $r0 = MOV_OR_pseudo killed $r8
$r0 = PseudoMove killed $r8
...

---
name: pseudoMov_non_GPR
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: pseudoMov_non_GPR
; CHECK: $r1 = MOV_alu_mv_mv_mv_scl $p0
; CHECK-NEXT: $p0 = MOV_scalar_pseudo $r1
; CHECK-NEXT: $p1 = MOV_scalar_pseudo $p0
; CHECK-NEXT: $r1 = MOV_alu_mv_mv_mv_scl $s0
; CHECK-NEXT: $s0 = MOV_alu_mv_mv_mv_scl $r1
; CHECK-NEXT: $s1 = MOV_alu_mv_mv_mv_scl $s0
$r1 = PseudoMove $p0
$p0 = PseudoMove $r1
$p1 = PseudoMove $p0
$r1 = PseudoMove $s0
$s0 = PseudoMove $r1
$s1 = PseudoMove $s0
...

---
name: COPY_GPR
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: COPY_GPR
; CHECK: $r1 = MOV_OR_pseudo $r0
$r1 = COPY $r0
...

---
name: COPY_non_GPR
alignment: 16
body: |
bb.0 (align 16):
; CHECK-LABEL: name: COPY_non_GPR
; CHECK: $r1 = MOV_alu_mv_mv_mv_scl $p0
; CHECK-NEXT: $p0 = MOV_scalar_pseudo $r1
; CHECK-NEXT: $p1 = MOV_scalar_pseudo $p0
; CHECK-NEXT: $r1 = MOV_alu_mv_mv_mv_scl $s0
; CHECK-NEXT: $s0 = MOV_alu_mv_mv_mv_scl $r1
; CHECK-NEXT: $s1 = MOV_alu_mv_mv_mv_scl $s0
$r1 = COPY $p0
$p0 = COPY $r1
$p1 = COPY $p0
$r1 = COPY $s0
$s0 = COPY $r1
$s1 = COPY $s0
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2p/run-physreg-copy.mir
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,11 @@ body: |
; CHECK: $p1 = MOV_scalar_pseudo $p0
; CHECK-NEXT: $lfl1 = VMOV_alu_mv_mv_x $lfl0
; CHECK-NEXT: $lfh1 = VMOV_alu_mv_mv_x $lfh0
; CHECK-NEXT: $r25 = MOV_alu_mv_mv_mv_scl $r24
; CHECK-NEXT: $r25 = MOV_OR_pseudo $r24
; CHECK-NEXT: $p0 = MOV_scalar_pseudo $p1
; CHECK-NEXT: $lfl0 = VMOV_alu_mv_mv_x $lfl1
; CHECK-NEXT: $lfh0 = VMOV_alu_mv_mv_x $lfh1
; CHECK-NEXT: $r24 = MOV_alu_mv_mv_mv_scl $r25
; CHECK-NEXT: $r24 = MOV_OR_pseudo $r25
$plfr1 = COPY $plfr0
$plfr0 = COPY $plfr1
...
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AIE/aie2p/streams.ll
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,7 @@ define dso_local void @_Z19test_put_ms_v64bf16Dv64_u6__bf16ii(<64 x bfloat> noun
; CHECK-LABEL: _Z19test_put_ms_v64bf16Dv64_u6__bf16ii:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopx ; mov r28, r1
; CHECK-NEXT: vextract.32 r0, x4, #0, vaddsign1
; CHECK-NEXT: nopa ; nopb ; nopx ; vextract.32 r0, x4, #0, vaddsign1
; CHECK-NEXT: vextract.32 r2, x4, #1, vaddsign1
; CHECK-NEXT: mov ms, r0; vextract.32 r0, x4, #2, vaddsign1
; CHECK-NEXT: mov ms, r2; vextract.32 r2, x4, #3, vaddsign1
Expand Down Expand Up @@ -171,7 +170,7 @@ define dso_local void @_Z19test_put_ms_v64bf16Dv64_u6__bf16ii(<64 x bfloat> noun
; CHECK-NEXT: mov ms, r2; vextract.32 r2, x5, #13, vaddsign1
; CHECK-NEXT: mov ms, r0; ret lr; vextract.32 r0, x5, #14, vaddsign1
; CHECK-NEXT: mov ms, r2; vextract.32 r2, x5, #15, vaddsign1 // Delay Slot 5
; CHECK-NEXT: mov ms, r0 // Delay Slot 4
; CHECK-NEXT: mov ms, r0; or r28, r1, r1 // Delay Slot 4
; CHECK-NEXT: mov ms, r2, r28 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AIE/aie2p/vscl2vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -629,9 +629,9 @@ define dso_local noundef <32 x bfloat> @_Z13test_upd_elemDv32_u6__bf16iy(<32 x b
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; ret lr; nopm ; nops
; CHECK-NEXT: mov r4, r1 // Delay Slot 5
; CHECK-NEXT: mov r29, r0 // Delay Slot 4
; CHECK-NEXT: mov r5, r2 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r4, r1 // Delay Slot 4
; CHECK-NEXT: or r29, r0, r0; mov r5, r2 // Delay Slot 3
; CHECK-NEXT: vinsert.64 x0, x2, r29, r5:r4 // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
Expand Down Expand Up @@ -729,8 +729,8 @@ define dso_local noundef <2 x float> @_Z16test_ext_v2floatDv16_fii(<16 x float>
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mov r10, r0 // Delay Slot 2
; CHECK-NEXT: mov r1, r8 // Delay Slot 1
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r10, r0, r0; mov r1, r8 // Delay Slot 1
; CHECK-NEXT: lda lr, [sp, #-52]; nopx // 4-byte Folded Reload
; CHECK-NEXT: nop
; CHECK-NEXT: nop
Expand All @@ -740,8 +740,8 @@ define dso_local noundef <2 x float> @_Z16test_ext_v2floatDv16_fii(<16 x float>
; CHECK-NEXT: lda r11, [sp, #-64] // 4-byte Folded Reload
; CHECK-NEXT: ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r11, r0 // Delay Slot 4
; CHECK-NEXT: mov r0, r10 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: or r11, r0, r0; mov r0, r10 // Delay Slot 3
; CHECK-NEXT: paddxm [sp], #-64 // Delay Slot 2
; CHECK-NEXT: mov r1, r11 // Delay Slot 1
entry:
Expand Down Expand Up @@ -803,8 +803,8 @@ define dso_local noundef <2 x float> @_Z20test_extract_v2floatDv16_fii(<16 x flo
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mov r10, r0 // Delay Slot 2
; CHECK-NEXT: mov r1, r8 // Delay Slot 1
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r10, r0, r0; mov r1, r8 // Delay Slot 1
; CHECK-NEXT: lda lr, [sp, #-52]; nopx // 4-byte Folded Reload
; CHECK-NEXT: nop
; CHECK-NEXT: nop
Expand All @@ -814,8 +814,8 @@ define dso_local noundef <2 x float> @_Z20test_extract_v2floatDv16_fii(<16 x flo
; CHECK-NEXT: lda r11, [sp, #-64] // 4-byte Folded Reload
; CHECK-NEXT: ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r11, r0 // Delay Slot 4
; CHECK-NEXT: mov r0, r10 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: or r11, r0, r0; mov r0, r10 // Delay Slot 3
; CHECK-NEXT: paddxm [sp], #-64 // Delay Slot 2
; CHECK-NEXT: mov r1, r11 // Delay Slot 1
entry:
Expand Down Expand Up @@ -846,8 +846,8 @@ define dso_local noundef <2 x float> @_Z20test_extract_v2floatDv16_fi(<16 x floa
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mov r10, r0 // Delay Slot 2
; CHECK-NEXT: mov r1, r8 // Delay Slot 1
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: or r10, r0, r0; mov r1, r8 // Delay Slot 1
; CHECK-NEXT: lda lr, [sp, #-52]; nopx // 4-byte Folded Reload
; CHECK-NEXT: nop
; CHECK-NEXT: nop
Expand All @@ -857,8 +857,8 @@ define dso_local noundef <2 x float> @_Z20test_extract_v2floatDv16_fi(<16 x floa
; CHECK-NEXT: lda r11, [sp, #-64] // 4-byte Folded Reload
; CHECK-NEXT: ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r11, r0 // Delay Slot 4
; CHECK-NEXT: mov r0, r10 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: or r11, r0, r0; mov r0, r10 // Delay Slot 3
; CHECK-NEXT: paddxm [sp], #-64 // Delay Slot 2
; CHECK-NEXT: mov r1, r11 // Delay Slot 1
entry:
Expand Down