Skip to content

Commit

Permalink
arm64jit: Implement vector unpacks.
Browse files Browse the repository at this point in the history
  • Loading branch information
unknownbrackets committed Sep 8, 2023
1 parent e03ae26 commit c523273
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 2 deletions.
11 changes: 11 additions & 0 deletions Common/Arm64Emitter.cpp
Expand Up @@ -3684,6 +3684,12 @@ void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
{
USHLL(src_size, Rd, Rn, shift, true);
}
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
SHLL(src_size, Rd, Rn, false);
}
void ARM64FloatEmitter::SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
SHLL(src_size, Rd, Rn, true);
}
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
{
SXTL(src_size, Rd, Rn, false);
Expand Down Expand Up @@ -3723,6 +3729,11 @@ void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift,
EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);
}

void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {
_assert_msg_(src_size <= 32, "%s shift amount cannot be 64", __FUNCTION__);
Emit2RegMisc(upper, 1, src_size >> 4, 0b10011, Rd, Rn);
}

void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
{
_assert_msg_(shift > 0, "%s shift amount must be greater than zero!", __FUNCTION__);
Expand Down
4 changes: 4 additions & 0 deletions Common/Arm64Emitter.h
Expand Up @@ -976,6 +976,9 @@ class ARM64FloatEmitter
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
// Shift == src_size for these.
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
Expand Down Expand Up @@ -1034,6 +1037,7 @@ class ARM64FloatEmitter

void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
Expand Down
47 changes: 45 additions & 2 deletions Core/MIPS/ARM64/Arm64IRCompVec.cpp
Expand Up @@ -713,9 +713,52 @@ void Arm64JitBackend::CompIR_VecPack(IRInst inst) {
break;

case IROp::Vec2Unpack16To31:
case IROp::Vec4Unpack8To32:
// Viewed as 16-bit: ABxx -> 0A0B, then shift a zero into the sign place.
if (Overlap(inst.dest, 2, inst.src1, 1)) {
regs_.MapVec2(inst.dest, MIPSMap::DIRTY);
} else {
regs_.Map(inst);
}
if (inst.src1 == inst.dest + 1) {
fp_.USHLL2(16, regs_.FQ(inst.dest), regs_.FD(inst.src1), 15);
} else {
fp_.USHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.src1), 15);
}
break;

case IROp::Vec2Unpack16To32:
CompIR_Generic(inst);
// Just Vec2Unpack16To31, without the shift.
if (Overlap(inst.dest, 2, inst.src1, 1)) {
regs_.MapVec2(inst.dest, MIPSMap::DIRTY);
} else {
regs_.Map(inst);
}
if (inst.src1 == inst.dest + 1) {
fp_.SHLL2(16, regs_.FQ(inst.dest), regs_.FD(inst.src1));
} else {
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.src1));
}
break;

case IROp::Vec4Unpack8To32:
// Viewed as 8-bit: ABCD -> 000A000B000C000D.
if (Overlap(inst.dest, 4, inst.src1, 1)) {
regs_.MapVec4(inst.dest, MIPSMap::DIRTY);
if (inst.dest == inst.src1 + 2) {
fp_.SHLL2(8, regs_.FQ(inst.dest), regs_.FD(inst.src1 & ~3));
} else if (inst.dest != inst.src1) {
fp_.DUP(32, regs_.FQ(inst.dest), regs_.FQ(inst.src1), inst.src1 & 3);
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.dest));
} else {
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.src1));
}
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.dest));
} else {
regs_.Map(inst);
// Two steps: ABCD -> 0A0B0C0D, then to 000A000B000C000D.
fp_.SHLL(8, regs_.FQ(inst.dest), regs_.FD(inst.src1));
fp_.SHLL(16, regs_.FQ(inst.dest), regs_.FD(inst.dest));
}
break;

default:
Expand Down

0 comments on commit c523273

Please sign in to comment.