Skip to content

Commit

Permalink
Add more performant in-reg byte-reverse series of instr for P8 & P9
Browse files Browse the repository at this point in the history
As rlwimi became cheaper in P8 & P9, using it for in-register
byte-reverse instruction sequence results in less total instructions
and better performance

Signed-off-by: Abdulrahman Alattas <rmnattas@gmail.com>
  • Loading branch information
rmnattas committed Jan 5, 2021
1 parent d255f37 commit eb756f9
Showing 1 changed file with 39 additions and 0 deletions.
39 changes: 39 additions & 0 deletions compiler/p/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5795,6 +5795,11 @@ TR::Register *OMR::Power::TreeEvaluator::sbyteswapEvaluator(TR::Node *node, TR::
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brh, node, tgtRegister, srcRegister);
}
else if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P8))
{
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister, srcRegister, 24, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister, srcRegister, 8, 0x000000ff00);
}
else
{
TR::Register *tmpRegister = cg->allocateRegister();
Expand Down Expand Up @@ -5839,6 +5844,12 @@ TR::Register * OMR::Power::TreeEvaluator::ibyteswapEvaluator(TR::Node *node, TR:
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brw, node, tgtRegister, srcRegister);
}
else if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P8))
{
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister, srcRegister, 24, 0x00ffffff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister, srcRegister, 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister, srcRegister, 8, 0x00000000ff);
}
else
{
TR::Register *tmp1Register = cg->allocateRegister();
Expand Down Expand Up @@ -5890,6 +5901,25 @@ TR::Register *OMR::Power::TreeEvaluator::lbyteswapEvaluator(TR::Node *node, TR::
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brd, node, tgtRegister, srcLRegister);
}
else if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P8))
{
TR::Register *srcHRegister = cg->allocateRegister();
TR::Register *tmpRegister = cg->allocateRegister();

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rldicl, node, srcHRegister, srcLRegister, 32, 0x00ffffffff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmpRegister, srcLRegister, 24, 0x00ffffff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister, srcHRegister, 24, 0x00ffffff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tmpRegister, srcLRegister, 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister, srcHRegister, 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tmpRegister, srcLRegister, 8, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister, srcHRegister, 8, 0x00000000ff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rldimi, node, tgtRegister, tmpRegister, 32, 0xffffffff00000000);

cg->stopUsingRegister(srcHRegister);
cg->stopUsingRegister(tmpRegister);
}
else
{
TR::Register *srcHRegister = cg->allocateRegister();
Expand Down Expand Up @@ -5941,6 +5971,15 @@ TR::Register *OMR::Power::TreeEvaluator::lbyteswapEvaluator(TR::Node *node, TR::
generateTrg1Src1Instruction(cg, TR::InstOpCode::brw, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder());
generateTrg1Src1Instruction(cg, TR::InstOpCode::brw, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder());
}
else if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P8))
{
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder(), 24, 0x00ffffff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder(), 24, 0x00ffffff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder(), 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder(), 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder(), 8, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwimi, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder(), 8, 0x00000000ff);
}
else
{
TR::Register *tmp1Register = cg->allocateRegister();
Expand Down

0 comments on commit eb756f9

Please sign in to comment.