Skip to content

Commit

Permalink
Implement sbyteswap and lbyteswap on Power
Browse files Browse the repository at this point in the history
The sbyteswap and lbyteswap opcodes are now supported in the Power
codegen using code taken from OpenJ9's handling of the recognized
Short.reverseBytes and Long.reverseBytes methods.

Signed-off-by: Ben Thomas <ben@benthomas.ca>
  • Loading branch information
aviansie-ben committed Sep 14, 2020
1 parent 7d68a30 commit 8e2a3a7
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 2 deletions.
1 change: 1 addition & 0 deletions compiler/p/codegen/OMRCodeGenerator.cpp
Expand Up @@ -209,6 +209,7 @@ OMR::Power::CodeGenerator::CodeGenerator() :
self()->setSupportsPrimitiveArrayCopy();
self()->setSupportsReferenceArrayCopy();
self()->setSupportsSelect();
self()->setSupportsByteswap();

// disabled for now
//
Expand Down
183 changes: 183 additions & 0 deletions compiler/p/codegen/OMRTreeEvaluator.cpp
Expand Up @@ -5824,6 +5824,78 @@ TR::Register *OMR::Power::TreeEvaluator::retrieveTOCRegister(TR::Node *node, TR:
return cg->machine()->getRealRegister(TR::RealRegister::gr2);
}

TR::Register *OMR::Power::TreeEvaluator::sbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
TR_ASSERT(node->getNumChildren() == 1, "Wrong number of children in sbyteswapEvaluator");

TR::Node *firstChild = node->getFirstChild();
TR::Register *tgtRegister = cg->allocateRegister();

TR::Node *firstNonConversionOpCodeNode = node->getFirstChild();
TR::DataType nodeType = firstNonConversionOpCodeNode->getType();

//Move through descendants until a non conversion opcode is reached,
//while making sure all nodes have a ref count of 1 and the types are between 2-8 bytes
while (firstNonConversionOpCodeNode->getOpCode().isConversion() &&
firstNonConversionOpCodeNode->getReferenceCount() == 1 &&
(nodeType.isInt16() || nodeType.isInt32() || nodeType.isInt64()))
{
firstNonConversionOpCodeNode = firstNonConversionOpCodeNode->getFirstChild();
nodeType = firstNonConversionOpCodeNode->getType();
}

if (!firstNonConversionOpCodeNode->getRegister() &&
firstNonConversionOpCodeNode->getOpCode().isMemoryReference() &&
firstNonConversionOpCodeNode->getReferenceCount() == 1 &&
(nodeType.isInt16() || nodeType.isInt32() || nodeType.isInt64()))
{
TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, firstNonConversionOpCodeNode, 2);
#ifndef __LITTLE_ENDIAN__
//On Big Endian Machines
if (nodeType.isInt32())
tempMR->addToOffset(node,2,cg);
else if (nodeType.isInt64())
tempMR->addToOffset(node,6,cg);
#endif
tempMR->forceIndexedForm(firstNonConversionOpCodeNode, cg);
generateTrg1MemInstruction(cg, TR::InstOpCode::lhbrx, node, tgtRegister, tempMR);
tempMR->decNodeReferenceCounts(cg);

//Decrement Ref count for the intermediate conversion nodes
firstNonConversionOpCodeNode = node->getFirstChild();
while (firstNonConversionOpCodeNode->getOpCode().isConversion())
{
cg->decReferenceCount(firstNonConversionOpCodeNode);
firstNonConversionOpCodeNode = firstNonConversionOpCodeNode->getFirstChild();
}
}
else
{
TR::Register *srcRegister = cg->evaluate(firstChild);

if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10))
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brh, node, tgtRegister, srcRegister);
}
else
{
TR::Register *tmpRegister = cg->allocateRegister();

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister, srcRegister, 24, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmpRegister, srcRegister, 8, 0x000000ff00);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmpRegister);

cg->stopUsingRegister(tmpRegister);
}
cg->decReferenceCount(firstChild);
}

generateTrg1Src1Instruction(cg, TR::InstOpCode::extsh, node, tgtRegister, tgtRegister);

node->setRegister(tgtRegister);
return tgtRegister;
}

TR::Register * OMR::Power::TreeEvaluator::ibyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
TR_ASSERT(node->getNumChildren() == 1, "Wrong number of children in ibyteswapEvaluator");
Expand Down Expand Up @@ -5868,3 +5940,114 @@ TR::Register * OMR::Power::TreeEvaluator::ibyteswapEvaluator(TR::Node *node, TR:
node->setRegister(tgtRegister);
return tgtRegister;
}

TR::Register *OMR::Power::TreeEvaluator::lbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg)
{
TR::Compilation *comp = cg->comp();
TR_ASSERT(node->getNumChildren() == 1, "Wrong number of children in lbyteswapEvaluator");

if (comp->target().is64Bit())
{
TR::Node *firstChild = node->getFirstChild();
TR::Register *tgtRegister = cg->allocateRegister();

if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P7) &&
!firstChild->getRegister() &&
firstChild->getOpCode().isMemoryReference() &&
firstChild->getReferenceCount() == 1)
{
TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, firstChild, 8);
tempMR->forceIndexedForm(firstChild, cg);
generateTrg1MemInstruction(cg, TR::InstOpCode::ldbrx, node, tgtRegister, tempMR);
tempMR->decNodeReferenceCounts(cg);
}
else
{
TR::Register *srcLRegister = cg->evaluate(firstChild);

if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10))
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brd, node, tgtRegister, srcLRegister);
}
else
{
TR::Register *srcHRegister = cg->allocateRegister();
TR::Register *tgtHRegister = cg->allocateRegister();
TR::Register *tmp1Register = cg->allocateRegister();
TR::Register *tmp2Register = cg->allocateRegister();

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rldicl, node, srcHRegister, srcLRegister, 32, 0x00ffffffff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister, srcHRegister, 8, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtHRegister, srcLRegister, 8, 0x00000000ff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcHRegister, 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcLRegister, 8, 0x0000ff0000);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtHRegister, tgtHRegister, tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcHRegister, 24, 0x000000ff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcLRegister, 24, 0x000000ff00);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtHRegister, tgtHRegister, tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcHRegister, 24, CONSTANT64(0x00ff000000));
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcLRegister, 24, CONSTANT64(0x00ff000000));
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtHRegister, tgtHRegister, tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rldimi, node, tgtRegister, tgtHRegister, 32, CONSTANT64(0xffffffff00000000));

cg->stopUsingRegister(tmp2Register);
cg->stopUsingRegister(tmp1Register);
cg->stopUsingRegister(tgtHRegister);
cg->stopUsingRegister(srcHRegister);
}
cg->decReferenceCount(firstChild);
}

node->setRegister(tgtRegister);
return tgtRegister;
}
else //32-Bit Target
{
TR::Node *firstChild = node->getFirstChild();
TR::RegisterPair *tgtRegister = cg->allocateRegisterPair(cg->allocateRegister(), cg->allocateRegister());
TR::Register *srcRegister = cg->evaluate(firstChild);

if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10))
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brw, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder());
generateTrg1Src1Instruction(cg, TR::InstOpCode::brw, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder());
}
else
{
TR::Register *tmp1Register = cg->allocateRegister();
TR::Register *tmp2Register = cg->allocateRegister();

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder(), 8, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder(), 8, 0x00000000ff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcRegister->getHighOrder(), 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcRegister->getLowOrder(), 8, 0x0000ff0000);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getLowOrder(), tgtRegister->getLowOrder(), tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getHighOrder(), tgtRegister->getHighOrder(), tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcRegister->getHighOrder(), 24, 0x000000ff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcRegister->getLowOrder(), 24, 0x000000ff00);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getLowOrder(), tgtRegister->getLowOrder(), tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getHighOrder(), tgtRegister->getHighOrder(), tmp2Register);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcRegister->getHighOrder(), 24, 0x00ff000000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcRegister->getLowOrder(), 24, 0x00ff000000);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getLowOrder(), tgtRegister->getLowOrder(), tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getHighOrder(), tgtRegister->getHighOrder(), tmp2Register);

cg->stopUsingRegister(tmp2Register);
cg->stopUsingRegister(tmp1Register);
}
cg->decReferenceCount(firstChild);

node->setRegister(tgtRegister);
return tgtRegister;
}
}
2 changes: 2 additions & 0 deletions compiler/p/codegen/OMRTreeEvaluator.hpp
Expand Up @@ -520,7 +520,9 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator
static TR::Register *xfRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *xdRegLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg);

static TR::Register *sbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *ibyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *lbyteswapEvaluator(TR::Node *node, TR::CodeGenerator *cg);

static TR::Register *evaluateNULLCHKWithPossibleResolve(TR::Node *node, bool needsResolve, TR::CodeGenerator *cg);
static TR::Instruction *generateNullTestInstructions(
Expand Down
4 changes: 2 additions & 2 deletions compiler/p/codegen/OMRTreeEvaluatorTable.hpp
Expand Up @@ -736,9 +736,9 @@
TR::TreeEvaluator::longNumberOfLeadingZeros, // TR::lnolz (J9)
TR::TreeEvaluator::longNumberOfTrailingZeros, // TR::lnotz (J9)
TR::TreeEvaluator::longBitCount, // TR::lpopcnt (J9)
TR::TreeEvaluator::unImpOpEvaluator, // TR::sbyteswap
TR::TreeEvaluator::sbyteswapEvaluator, // TR::sbyteswap
TR::TreeEvaluator::ibyteswapEvaluator, // TR::ibyteswap
TR::TreeEvaluator::unImpOpEvaluator, // TR::lbyteswap
TR::TreeEvaluator::lbyteswapEvaluator, // TR::lbyteswap
TR::TreeEvaluator::unImpOpEvaluator, // TR::bbitpermute
TR::TreeEvaluator::unImpOpEvaluator, // TR::sbitpermute
TR::TreeEvaluator::unImpOpEvaluator, // TR::ibitpermute
Expand Down

0 comments on commit 8e2a3a7

Please sign in to comment.