Skip to content

Commit

Permalink
AArch64: Vector load and store support
Browse files Browse the repository at this point in the history
This commit adds the following support for vector operations-
- Quad-word vector load and store operations, and relevant binary
encodings.
- Handles vector registers for register dependency.

Signed-off-by: Md. Alvee Noor <mnoor@unb.ca>
  • Loading branch information
alvee-unb committed Mar 29, 2021
1 parent fe78cd7 commit c842228
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 17 deletions.
23 changes: 21 additions & 2 deletions compiler/aarch64/codegen/OMRMachine.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2018, 2020 IBM Corp. and others
* Copyright (c) 2018, 2021 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -125,6 +125,7 @@ TR::RealRegister *OMR::ARM64::Machine::findBestFreeRegister(TR::Instruction *cur
last = TR::RealRegister::LastAssignableGPR;
break;
case TR_FPR:
case TR_VRF:
first = maskI = TR::RealRegister::FirstFPR;
last = TR::RealRegister::LastFPR;
break;
Expand Down Expand Up @@ -242,6 +243,7 @@ TR::RealRegister *OMR::ARM64::Machine::freeBestRegister(TR::Instruction *current
last = TR::RealRegister::LastGPR;
break;
case TR_FPR:
case TR_VRF:
first = maskI = TR::RealRegister::FirstFPR;
last = TR::RealRegister::LastFPR;
break;
Expand Down Expand Up @@ -345,6 +347,7 @@ TR::RealRegister *OMR::ARM64::Machine::freeBestRegister(TR::Instruction *current
}
break;
case TR_FPR:
case TR_VRF:
if (!comp->getOption(TR_DisableOOL) &&
(cg->isOutOfLineColdPath() || cg->isOutOfLineHotPath()) &&
registerToSpill->getBackingStorage())
Expand Down Expand Up @@ -427,6 +430,9 @@ TR::RealRegister *OMR::ARM64::Machine::freeBestRegister(TR::Instruction *current
case TR_FPR:
loadOp = TR::InstOpCode::vldrimmd;
break;
case TR_VRF:
loadOp = TR::InstOpCode::vldrimmq;
break;
default:
TR_ASSERT(false, "Unsupported RegisterKind.");
break;
Expand Down Expand Up @@ -499,6 +505,10 @@ TR::RealRegister *OMR::ARM64::Machine::reverseSpillState(TR::Instruction *curren
dataSize = 8;
storeOp = TR::InstOpCode::vstrimmd;
break;
case TR_VRF:
dataSize = 16;
storeOp = TR::InstOpCode::vstrimmq;
break;
default:
TR_ASSERT(false, "Unsupported RegisterKind.");
break;
Expand All @@ -516,6 +526,9 @@ TR::RealRegister *OMR::ARM64::Machine::reverseSpillState(TR::Instruction *curren
case TR_FPR:
dataSize = 8;
break;
case TR_VRF:
dataSize = 16;
break;
default:
TR_ASSERT(false, "Unsupported RegisterKind.");
break;
Expand Down Expand Up @@ -612,6 +625,9 @@ TR::RealRegister *OMR::ARM64::Machine::reverseSpillState(TR::Instruction *curren
case TR_FPR:
storeOp = TR::InstOpCode::vstrimmd;
break;
case TR_VRF:
storeOp = TR::InstOpCode::vstrimmq;
break;
default:
TR_ASSERT(false, "Unsupported RegisterKind.");
break;
Expand Down Expand Up @@ -689,6 +705,9 @@ static void registerCopy(TR::Instruction *precedingInstruction,
case TR_FPR:
generateTrg1Src1Instruction(cg, TR::InstOpCode::fmovd, node, targetReg, sourceReg, precedingInstruction);
break;
case TR_VRF:
generateTrg1Src2Instruction(cg, TR::InstOpCode::vorr2d, node, targetReg, sourceReg, sourceReg, precedingInstruction);
break;
default:
TR_ASSERT(false, "Unsupported RegisterKind.");
break;
Expand Down Expand Up @@ -780,7 +799,7 @@ void OMR::ARM64::Machine::coerceRegisterAssignment(TR::Instruction *currentInstr
TR::RealRegister *spareReg = NULL;
TR::Register *currentTargetVirtual = targetRegister->getAssignedRegister();

bool needTemp = (rk == TR_FPR); // xor is unavailable for register exchange
bool needTemp = (rk == TR_FPR || rk == TR_VRF); // xor is unavailable for register exchange

if (targetRegister->getState() == TR::RealRegister::Blocked)
{
Expand Down
40 changes: 36 additions & 4 deletions compiler/aarch64/codegen/OMRMemoryReference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -864,8 +864,24 @@ uint8_t *OMR::ARM64::MemoryReference::generateBinaryEncoding(TR::Instruction *cu
}
else if (isImm12OffsetInstruction(enc))
{
uint32_t size = (enc >> 30) & 3; /* b=0, h=1, w=2, x=3 */
uint32_t shifted = displacement >> size;
uint32_t size = (enc >> 30) & 3; /* b=0, h=1, w=2, x=3, q=0 */
uint32_t opc = (enc >> 22) & 3; /* 8bit, 16bit, 32bit, 64bit: 0 or 1; 128bit: 2 or 3 */
uint32_t bitsToShift;

if (opc == 2 || opc == 3)
{
bitsToShift = 4;
}
else if (opc == 0 || opc == 1)
{
bitsToShift = size;
}
else
{
TR_ASSERT_FATAL(false, "Instruction format is unknown.");
}

uint32_t shifted = displacement >> bitsToShift;

if (size > 0)
{
Expand Down Expand Up @@ -1027,8 +1043,24 @@ uint32_t OMR::ARM64::MemoryReference::estimateBinaryLength(TR::InstOpCode op)
}
else if (isImm12OffsetInstruction(enc))
{
uint32_t size = (enc >> 30) & 3; /* b=0, h=1, w=2, x=3 */
uint32_t shifted = displacement >> size;
uint32_t size = (enc >> 30) & 3; /* b=0, h=1, w=2, x=3, q=0 */
uint32_t opc = (enc >> 22) & 3; /* 8bit, 16bit, 32bit, 64bit: 0 or 1; 128bit: 2 or 3 */
uint32_t bitsToShift;

if (opc == 2 || opc == 3)
{
bitsToShift = 4;
}
else if (opc == 0 || opc == 1)
{
bitsToShift = size;
}
else
{
TR_ASSERT_FATAL(false, "Instruction format is unknown.");
}

uint32_t shifted = displacement >> bitsToShift;

if (size > 0)
{
Expand Down
6 changes: 3 additions & 3 deletions compiler/aarch64/codegen/OMRRealRegister.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2018, 2020 IBM Corp. and others
* Copyright (c) 2018, 2021 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -32,7 +32,7 @@ OMR::ARM64::RealRegister::regMaskToRealRegister(TR_RegisterMask mask, TR_Registe

if (rk == TR_GPR)
rr = FirstGPR;
else if (rk == TR_FPR)
else if ((rk == TR_FPR) || (rk == TR_VRF))
rr = FirstFPR;

return cg->machine()->getRealRegister(RegNum(rr+bitPos));
Expand All @@ -43,7 +43,7 @@ OMR::ARM64::RealRegister::getAvailableRegistersMask(TR_RegisterKinds rk)
{
if (rk == TR_GPR)
return AvailableGPRMask;
else if (rk == TR_FPR)
else if ((rk == TR_FPR) || (rk == TR_VRF))
return AvailableFPRMask;
else
return 0;
Expand Down
12 changes: 10 additions & 2 deletions compiler/aarch64/codegen/OMRRegisterDependency.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2018, 2020 IBM Corp. and others
* Copyright (c) 2018, 2021 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -89,7 +89,7 @@ OMR::ARM64::RegisterDependencyConditions::RegisterDependencyConditions(
{
TR_RegisterKinds kind = reg->getKind();

TR_ASSERT_FATAL((kind == TR_GPR) || (kind == TR_FPR), "Invalid register kind.");
TR_ASSERT_FATAL((kind == TR_GPR) || (kind == TR_FPR) || (kind == TR_VRF), "Invalid register kind.");

if (kind == TR_GPR)
{
Expand All @@ -103,6 +103,11 @@ OMR::ARM64::RegisterDependencyConditions::RegisterDependencyConditions(
}
iCursor = generateMovInstruction(cg, node, copyReg, reg, true, iCursor);
}
else if (kind == TR_VRF)
{
copyReg = cg->allocateRegister(TR_VRF);
iCursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::vorr2d, node, copyReg, reg, reg, iCursor);
}
else
{
bool isSinglePrecision = reg->isSinglePrecision();
Expand Down Expand Up @@ -370,6 +375,9 @@ void TR_ARM64RegisterDependencyGroup::assignRegisters(
case TR_FPR:
opCode = TR::InstOpCode::vldrimmd;
break;
case TR_VRF:
opCode = TR::InstOpCode::vldrimmq;
break;
default:
TR_ASSERT(0, "\nRegister kind not supported in OOL spill\n");
break;
Expand Down
18 changes: 18 additions & 0 deletions compiler/aarch64/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,10 @@ TR::Register *commonLoadEvaluator(TR::Node *node, TR::InstOpCode::Mnemonic op, T
{
tempReg = cg->allocateRegister(TR_FPR);
}
else if (op == TR::InstOpCode::vldrimmq)
{
tempReg = cg->allocateRegister(TR_VRF);
}
else
{
tempReg = cg->allocateRegister();
Expand Down Expand Up @@ -510,6 +514,13 @@ OMR::ARM64::TreeEvaluator::cloadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
return commonLoadEvaluator(node, TR::InstOpCode::ldrhimm, cg);
}

// also handles vloadi
TR::Register *
OMR::ARM64::TreeEvaluator::vloadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return commonLoadEvaluator(node, TR::InstOpCode::vldrimmq, cg);
}

TR::Register *
OMR::ARM64::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
Expand Down Expand Up @@ -608,6 +619,13 @@ OMR::ARM64::TreeEvaluator::astoreEvaluator(TR::Node *node, TR::CodeGenerator *cg
return commonStoreEvaluator(node, op, cg);
}

// also handles vstorei
TR::Register *
OMR::ARM64::TreeEvaluator::vstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
return commonStoreEvaluator(node, TR::InstOpCode::vstrimmq, cg);
}

TR::Register *
OMR::ARM64::TreeEvaluator::monentEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
Expand Down
12 changes: 6 additions & 6 deletions compiler/aarch64/codegen/OMRTreeEvaluatorTable.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2018, 2020 IBM Corp. and others
* Copyright (c) 2018, 2021 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -468,7 +468,7 @@
#define _vdecEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vnegEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vcomEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vaddEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vaddEvaluator TR::TreeEvaluator::vaddEvaluator
#define _vsubEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vmulEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vdivEvaluator TR::TreeEvaluator::unImpOpEvaluator
Expand All @@ -489,10 +489,10 @@
#define _vucmpleEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vcmpgeEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vucmpgeEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vloadEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vloadiEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vstoreEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vstoreiEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vloadEvaluator TR::TreeEvaluator::vloadEvaluator
#define _vloadiEvaluator TR::TreeEvaluator::vloadEvaluator
#define _vstoreEvaluator TR::TreeEvaluator::vstoreEvaluator
#define _vstoreiEvaluator TR::TreeEvaluator::vstoreEvaluator
#define _vrandEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vreturnEvaluator TR::TreeEvaluator::unImpOpEvaluator
#define _vcallEvaluator TR::TreeEvaluator::unImpOpEvaluator
Expand Down

0 comments on commit c842228

Please sign in to comment.