Skip to content

Commit

Permalink
Merge pull request #1705 from Sonicadvance1/AArch64_emitter_fixes
Browse files Browse the repository at this point in the history
[AArch64] Improvements to the AArch64 emitter.
  • Loading branch information
Sonicadvance1 committed Dec 21, 2014
2 parents 59e1a8a + d3c2e8f commit 8ea6c62
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 31 deletions.
190 changes: 170 additions & 20 deletions Source/Core/Common/Arm64Emitter.cpp
Expand Up @@ -4,7 +4,8 @@

#include <limits>

#include "Arm64Emitter.h"
#include "Common/Arm64Emitter.h"
#include "Common/MathUtil.h"

namespace Arm64Gen
{
Expand Down Expand Up @@ -402,24 +403,21 @@ void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2,
Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
}

void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
bool b64Bit = Is64Bit(Rt);
bool bVec = IsVector(Rt);

if (b64Bit)
imm >>= 3;
else
imm >>= 2;
u32 offset = imm & 0x1FF;

_assert_msg_(DYNA_REC, !(imm & ~0x1FF), "%s: offset too large %d", __FUNCTION__, imm);
_assert_msg_(DYNA_REC, imm < -256 || imm > 255, "%s: offset too large %d", __FUNCTION__, imm);

Rt = DecodeReg(Rt);
Rn = DecodeReg(Rn);
Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 12) | (op2 << 10) | (Rn << 5) | Rt);
Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Rt);
}

void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
bool b64Bit = Is64Bit(Rt);
bool bVec = IsVector(Rt);
Expand All @@ -429,6 +427,7 @@ void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn,
else
imm >>= 2;

_assert_msg_(DYNA_REC, imm < 0, "%s(INDEX_UNSIGNED): offset must be positive", __FUNCTION__);
_assert_msg_(DYNA_REC, !(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm);

Rt = DecodeReg(Rt);
Expand Down Expand Up @@ -527,6 +526,14 @@ void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64R
((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
}

void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm)
{
Rd = DecodeReg(Rd);

Write32((op << 31) | ((imm & 0x3) << 29) | (0b10000 << 24) | \
((imm & 0x1FFFFC) << 3) | Rd);
}

// FixupBranch branching
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
{
Expand Down Expand Up @@ -1272,71 +1279,71 @@ void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)

// Load/Store register (immediate post-indexed)
// XXX: Most of these support vectors
void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(0x0E0,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(0x0E1,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E2 : 0x0E3,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(0x1E0,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(0x1E1,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E2 : 0x1E3,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E0 : 0x2E0,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm);
else
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E1 : 0x2E1,
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
}
void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm)
void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
{
if (type == INDEX_UNSIGNED)
EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm);
Expand Down Expand Up @@ -1391,11 +1398,21 @@ void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType exten
EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm, extend);
}

// Address of label/page PC-relative
void ARM64XEmitter::ADR(ARM64Reg Rd, s32 imm)
{
EncodeAddressInst(0, Rd, imm);
}
void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
{
EncodeAddressInst(1, Rd, imm >> 12);
}

// Wrapper around MOVZ+MOVK
void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
{
unsigned parts = Is64Bit(Rd) ? 4 : 2;
bool upload_part[4] = {};
BitSet32 upload_part(0);
bool need_movz = false;

if (!Is64Bit(Rd))
Expand Down Expand Up @@ -1425,12 +1442,42 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
for (unsigned i = 0; i < parts; ++i)
{
if ((imm >> (i * 16)) & 0xFFFF)
upload_part[i] = true;
upload_part[i] = 1;
else
need_movz = true;
}
}

u64 aligned_pc = (u64)GetCodePtr() & ~0xFFF;
s64 aligned_offset = (s64)imm - (s64)aligned_pc;
if (upload_part.Count() > 1 && std::abs(aligned_offset) < 0xFFFFFFFF)
{
// Immediate we are loading is within 4GB of our aligned range
// Most likely a address that we can load in one or two instructions
if (!(std::abs(aligned_offset) & 0xFFF))
{
// Aligned ADR
ADRP(Rd, (s32)aligned_offset);
return;
}
else
{
// If the address is within 1MB of PC we can load it in a single instruction still
s64 offset = (s64)imm - (s64)GetCodePtr();
if (offset >= -0xFFFFF && offset <= 0xFFFFF)
{
ADR(Rd, (s32)offset);
return;
}
else
{
ADRP(Rd, (s32)(aligned_offset & ~0xFFF));
ADD(Rd, Rd, imm & 0xFFF);
return;
}
}
}

for (unsigned i = 0; i < parts; ++i)
{
if (need_movz && upload_part[i])
Expand All @@ -1444,7 +1491,110 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
}
}
}

void ARM64XEmitter::ABI_PushRegisters(BitSet32 registers)
{
int num_regs = registers.Count();

if (num_regs % 2)
{
bool first = true;

// Stack is required to be quad-word aligned.
u32 stack_size = ROUND_UP(num_regs * 8, 16);
u32 current_offset = 0;
std::vector<ARM64Reg> reg_pair;

for (auto it : registers)
{
if (first)
{
STR(INDEX_PRE, (ARM64Reg)(X0 + it), SP, -stack_size);
first = false;
current_offset += 16;
}
else
{
reg_pair.push_back((ARM64Reg)(X0 + it));
if (reg_pair.size() == 2)
{
STP(INDEX_UNSIGNED, reg_pair[0], reg_pair[1], SP, current_offset);
reg_pair.clear();
current_offset += 16;
}
}
}
}
else
{
std::vector<ARM64Reg> reg_pair;

for (auto it : registers)
{
reg_pair.push_back((ARM64Reg)(X0 + it));
if (reg_pair.size() == 2)
{
STP(INDEX_PRE, reg_pair[0], reg_pair[1], SP, -16);
reg_pair.clear();
}
}
}
}

void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
{
int num_regs = registers.Count();

if (num_regs % 2)
{
bool first = true;

std::vector<ARM64Reg> reg_pair;

for (auto it : registers)
{
if (ignore_mask[it])
it = WSP;

if (first)
{
LDR(INDEX_POST, (ARM64Reg)(X0 + it), SP, 16);
first = false;
}
else
{
reg_pair.push_back((ARM64Reg)(X0 + it));
if (reg_pair.size() == 2)
{
LDP(INDEX_POST, reg_pair[0], reg_pair[1], SP, 16);
reg_pair.clear();
}
}
}
}
else
{
std::vector<ARM64Reg> reg_pair;

for (int i = 31; i >= 0; --i)
{
if (!registers[i])
continue;

int reg = i;

if (ignore_mask[reg])
reg = WSP;

reg_pair.push_back((ARM64Reg)(X0 + reg));
if (reg_pair.size() == 2)
{
LDP(INDEX_POST, reg_pair[1], reg_pair[0], SP, 16);
reg_pair.clear();
}
}
}
}

}
Expand Down
32 changes: 21 additions & 11 deletions Source/Core/Common/Arm64Emitter.h
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "Common/ArmCommon.h"
#include "Common/BitSet.h"
#include "Common/CodeBlock.h"
#include "Common/Common.h"

Expand Down Expand Up @@ -292,14 +293,15 @@ class ARM64XEmitter
void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm);
void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt);
void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend);
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);

protected:
inline void Write32(u32 value)
Expand Down Expand Up @@ -524,15 +526,15 @@ class ARM64XEmitter
void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);

// Load/Store register (immediate indexed)
void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, u32 imm);
void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);

// Load/Store register (register offset)
void STRB(ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm, ExtendType extend = EXTEND_LSL);
Expand All @@ -551,8 +553,16 @@ class ARM64XEmitter
void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);

// Address of label/page PC-relative
void ADR(ARM64Reg Rd, s32 imm);
void ADRP(ARM64Reg Rd, s32 imm);

// Wrapper around MOVZ+MOVK
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);

// ABI related
void ABI_PushRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
};

class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
Expand Down

0 comments on commit 8ea6c62

Please sign in to comment.