Skip to content

Commit

Permalink
Merge pull request #11114 from aviansie-ben/recognize-reversebytes
Browse files Browse the repository at this point in the history
  • Loading branch information
fjeremic committed Nov 20, 2020
2 parents ef72ef7 + 354cc23 commit ac22731
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 248 deletions.
23 changes: 23 additions & 0 deletions runtime/compiler/optimizer/J9RecognizedCallTransformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ void J9::RecognizedCallTransformer::processIntrinsicFunction(TR::TreeTop* treeto
TR::Node::recreate(node, opcode);
}

void J9::RecognizedCallTransformer::processConvertingUnaryIntrinsicFunction(TR::TreeTop* treetop, TR::Node* node, TR::ILOpCodes argConvertOpcode, TR::ILOpCodes opcode, TR::ILOpCodes resultConvertOpcode)
{
TR::Node* actualArg = TR::Node::create(argConvertOpcode, 1, node->getFirstChild());
TR::Node* actualResult = TR::Node::create(opcode, 1, actualArg);

TR::Node::recreate(node, resultConvertOpcode);
node->getFirstChild()->decReferenceCount();
node->setAndIncChild(0, actualResult);
}

void J9::RecognizedCallTransformer::process_java_lang_Class_IsAssignableFrom(TR::TreeTop* treetop, TR::Node* node)
{
auto toClass = node->getChild(0);
Expand Down Expand Up @@ -387,6 +397,10 @@ bool J9::RecognizedCallTransformer::isInlineable(TR::TreeTop* treetop)
case TR::java_lang_StrictMath_sqrt:
case TR::java_lang_Math_sqrt:
return comp()->target().cpu.getSupportsHardwareSQRT();
case TR::java_lang_Short_reverseBytes:
case TR::java_lang_Integer_reverseBytes:
case TR::java_lang_Long_reverseBytes:
return comp()->cg()->supportsByteswap();
default:
return false;
}
Expand Down Expand Up @@ -467,6 +481,15 @@ void J9::RecognizedCallTransformer::transform(TR::TreeTop* treetop)
case TR::java_lang_Math_sqrt:
process_java_lang_StrictMath_and_Math_sqrt(treetop, node);
break;
case TR::java_lang_Short_reverseBytes:
processConvertingUnaryIntrinsicFunction(treetop, node, TR::i2s, TR::sbyteswap, TR::s2i);
break;
case TR::java_lang_Integer_reverseBytes:
processIntrinsicFunction(treetop, node, TR::ibyteswap);
break;
case TR::java_lang_Long_reverseBytes:
processIntrinsicFunction(treetop, node, TR::lbyteswap);
break;
default:
break;
}
Expand Down
4 changes: 3 additions & 1 deletion runtime/compiler/optimizer/J9RecognizedCallTransformer.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2017, 2019 IBM Corp. and others
* Copyright (c) 2017, 2020 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -42,6 +42,8 @@ class RecognizedCallTransformer : public OMR::RecognizedCallTransformer

private:
void processIntrinsicFunction(TR::TreeTop* treetop, TR::Node* node, TR::ILOpCodes opcode);
void processConvertingUnaryIntrinsicFunction(TR::TreeTop* treetop, TR::Node* node, TR::ILOpCodes argConvertOpcode, TR::ILOpCodes opcode, TR::ILOpCodes resultConvertOpcode);

/** \brief
* Transforms java/lang/Class.IsAssignableFrom(Ljava/lang/Class;)Z into a JIT helper call TR_checkAssignable with equivalent
* semantics.
Expand Down
5 changes: 1 addition & 4 deletions runtime/compiler/p/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,10 +359,7 @@ bool J9::Power::CodeGenerator::suppressInliningOfRecognizedMethod(TR::Recognized
return true;
}

if (method == TR::java_lang_Short_reverseBytes ||
method == TR::java_lang_Integer_reverseBytes ||
method == TR::java_lang_Long_reverseBytes ||
method == TR::java_lang_Math_fma_D ||
if (method == TR::java_lang_Math_fma_D ||
method == TR::java_lang_Math_fma_F ||
method == TR::java_lang_StrictMath_fma_D ||
method == TR::java_lang_StrictMath_fma_F)
Expand Down
206 changes: 0 additions & 206 deletions runtime/compiler/p/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9993,200 +9993,6 @@ static TR::Register *inlineAtomicOperation(TR::Node *node, TR::CodeGenerator *cg
return resultReg;
}

static TR::Register *inlineShortReverseBytes(TR::Node *node, TR::CodeGenerator *cg)
{
TR_ASSERT(node->getNumChildren() == 1, "Wrong number of children in inlineShortReverseBytes");

TR::Node *firstChild = node->getFirstChild();
TR::Register *tgtRegister = cg->allocateRegister();

TR::Node *firstNonConversionOpCodeNode = node->getFirstChild();
TR::DataType nodeType = firstNonConversionOpCodeNode->getType();

//Move through descendants until a non conversion opcode is reached,
//while making sure all nodes have a ref count of 1 and the types are between 2-8 bytes
while (firstNonConversionOpCodeNode->getOpCode().isConversion() &&
firstNonConversionOpCodeNode->getReferenceCount() == 1 &&
(nodeType.isInt16() || nodeType.isInt32() || nodeType.isInt64()))
{
firstNonConversionOpCodeNode = firstNonConversionOpCodeNode->getFirstChild();
nodeType = firstNonConversionOpCodeNode->getType();
}

if (!firstNonConversionOpCodeNode->getRegister() &&
firstNonConversionOpCodeNode->getOpCode().isMemoryReference() &&
firstNonConversionOpCodeNode->getReferenceCount() == 1 &&
(nodeType.isInt16() || nodeType.isInt32() || nodeType.isInt64()))
{
TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, firstNonConversionOpCodeNode, 2);
#ifndef __LITTLE_ENDIAN__
//On Big Endian Machines
if (nodeType.isInt32())
tempMR->addToOffset(node,2,cg);
else if (nodeType.isInt64())
tempMR->addToOffset(node,6,cg);
#endif
tempMR->forceIndexedForm(firstNonConversionOpCodeNode, cg);
generateTrg1MemInstruction(cg, TR::InstOpCode::lhbrx, node, tgtRegister, tempMR);
tempMR->decNodeReferenceCounts(cg);

//Decrement Ref count for the intermediate conversion nodes
firstNonConversionOpCodeNode = node->getFirstChild();
while (firstNonConversionOpCodeNode->getOpCode().isConversion())
{
cg->decReferenceCount(firstNonConversionOpCodeNode);
firstNonConversionOpCodeNode = firstNonConversionOpCodeNode->getFirstChild();
}
}
else
{
TR::Register *srcRegister = cg->evaluate(firstChild);

if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10))
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brh, node, tgtRegister, srcRegister);
}
else
{
TR::Register *tmpRegister = cg->allocateRegister();

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister, srcRegister, 24, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmpRegister, srcRegister, 8, 0x000000ff00);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmpRegister);

cg->stopUsingRegister(tmpRegister);
}
cg->decReferenceCount(firstChild);
}

generateTrg1Src1Instruction(cg, TR::InstOpCode::extsh, node, tgtRegister, tgtRegister);

node->setRegister(tgtRegister);
return tgtRegister;
}


static TR::Register *inlineIntegerReverseBytes(TR::Node *node, TR::CodeGenerator *cg)
{
TR_ASSERT(node->getNumChildren() == 1, "Wrong number of children in inlineIntegerReverseBytes");

//ibyteswap provides the same functionality as reverseBytes, so the node is recreated as an ibyteswap and evaluated as such
TR::Node::recreate(node, TR::ibyteswap);
return OMR::Power::TreeEvaluator::ibyteswapEvaluator(node, cg);
}


static TR::Register *inlineLongReverseBytes(TR::Node *node, TR::CodeGenerator *cg)
{
TR::Compilation *comp = cg->comp();
TR_ASSERT(node->getNumChildren() == 1, "Wrong number of children in inlineLongReverseBytes");

if (comp->target().is64Bit())
{
TR::Node *firstChild = node->getFirstChild();
TR::Register *tgtRegister = cg->allocateRegister();

if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P7) &&
!firstChild->getRegister() &&
firstChild->getOpCode().isMemoryReference() &&
firstChild->getReferenceCount() == 1)
{
TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, firstChild, 8);
tempMR->forceIndexedForm(firstChild, cg);
generateTrg1MemInstruction(cg, TR::InstOpCode::ldbrx, node, tgtRegister, tempMR);
tempMR->decNodeReferenceCounts(cg);
}
else
{
TR::Register *srcLRegister = cg->evaluate(firstChild);

if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10))
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brd, node, tgtRegister, srcLRegister);
}
else
{
TR::Register *srcHRegister = cg->allocateRegister();
TR::Register *tgtHRegister = cg->allocateRegister();
TR::Register *tmp1Register = cg->allocateRegister();
TR::Register *tmp2Register = cg->allocateRegister();

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rldicl, node, srcHRegister, srcLRegister, 32, 0x00ffffffff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister, srcHRegister, 8, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtHRegister, srcLRegister, 8, 0x00000000ff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcHRegister, 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcLRegister, 8, 0x0000ff0000);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtHRegister, tgtHRegister, tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcHRegister, 24, 0x000000ff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcLRegister, 24, 0x000000ff00);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtHRegister, tgtHRegister, tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcHRegister, 24, CONSTANT64(0x00ff000000));
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcLRegister, 24, CONSTANT64(0x00ff000000));
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister, tgtRegister, tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtHRegister, tgtHRegister, tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rldimi, node, tgtRegister, tgtHRegister, 32, CONSTANT64(0xffffffff00000000));

cg->stopUsingRegister(tmp2Register);
cg->stopUsingRegister(tmp1Register);
cg->stopUsingRegister(tgtHRegister);
cg->stopUsingRegister(srcHRegister);
}
cg->decReferenceCount(firstChild);
}

node->setRegister(tgtRegister);
return tgtRegister;
}
else //32-Bit Target
{
TR::Node *firstChild = node->getFirstChild();
TR::RegisterPair *tgtRegister = cg->allocateRegisterPair(cg->allocateRegister(), cg->allocateRegister());
TR::Register *srcRegister = cg->evaluate(firstChild);

if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P10))
{
generateTrg1Src1Instruction(cg, TR::InstOpCode::brw, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder());
generateTrg1Src1Instruction(cg, TR::InstOpCode::brw, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder());
}
else
{
TR::Register *tmp1Register = cg->allocateRegister();
TR::Register *tmp2Register = cg->allocateRegister();

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister->getLowOrder(), srcRegister->getHighOrder(), 8, 0x00000000ff);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tgtRegister->getHighOrder(), srcRegister->getLowOrder(), 8, 0x00000000ff);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcRegister->getHighOrder(), 8, 0x0000ff0000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcRegister->getLowOrder(), 8, 0x0000ff0000);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getLowOrder(), tgtRegister->getLowOrder(), tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getHighOrder(), tgtRegister->getHighOrder(), tmp2Register);

generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcRegister->getHighOrder(), 24, 0x000000ff00);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcRegister->getLowOrder(), 24, 0x000000ff00);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getLowOrder(), tgtRegister->getLowOrder(), tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getHighOrder(), tgtRegister->getHighOrder(), tmp2Register);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp1Register, srcRegister->getHighOrder(), 24, 0x00ff000000);
generateTrg1Src1Imm2Instruction(cg, TR::InstOpCode::rlwinm, node, tmp2Register, srcRegister->getLowOrder(), 24, 0x00ff000000);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getLowOrder(), tgtRegister->getLowOrder(), tmp1Register);
generateTrg1Src2Instruction(cg, TR::InstOpCode::OR, node, tgtRegister->getHighOrder(), tgtRegister->getHighOrder(), tmp2Register);

cg->stopUsingRegister(tmp2Register);
cg->stopUsingRegister(tmp1Register);
}
cg->decReferenceCount(firstChild);

node->setRegister(tgtRegister);
return tgtRegister;
}
}

static TR::Register *compressStringEvaluator(TR::Node *node, TR::CodeGenerator *cg, bool japaneseMethod)
{
TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->comp()->fe());
Expand Down Expand Up @@ -12101,18 +11907,6 @@ J9::Power::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
resultReg = inlineFPTrg1Src3(node, TR::InstOpCode::fmadds, cg);
return true;

case TR::java_lang_Short_reverseBytes:
resultReg = inlineShortReverseBytes(node, cg);
return true;

case TR::java_lang_Integer_reverseBytes:
resultReg = inlineIntegerReverseBytes(node, cg);
return true;

case TR::java_lang_Long_reverseBytes:
resultReg = inlineLongReverseBytes(node, cg);
return true;

case TR::java_lang_String_hashCodeImplDecompressed:
if (!TR::Compiler->om.canGenerateArraylets() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_PPC_P8) && comp->target().cpu.supportsFeature(OMR_FEATURE_PPC_HAS_VSX) && !comp->compileRelocatableCode()
#ifdef J9VM_OPT_JITSERVER
Expand Down
13 changes: 0 additions & 13 deletions runtime/compiler/x/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9951,16 +9951,6 @@ bool J9::X86::TreeEvaluator::VMinlineCallEvaluator(
}
break;

case TR::java_lang_Long_reverseBytes:
case TR::java_lang_Integer_reverseBytes:
case TR::java_lang_Short_reverseBytes:
{
if(comp->getOption(TR_EnableJCLInline)
&& performTransformation(comp, "O^O Enable JCL Integer/Long methods inline for: %s\n", cg->getDebug()->getName(node)) )
return TR::TreeEvaluator::sbyteswapEvaluator(node, cg) != NULL;
break;
}

case TR::java_util_concurrent_atomic_Fences_reachabilityFence:
{
cg->decReferenceCount(node->getChild(0));
Expand Down Expand Up @@ -11637,9 +11627,6 @@ J9::X86::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *c

case TR::java_lang_Math_sqrt:
case TR::java_lang_StrictMath_sqrt:
case TR::java_lang_Long_reverseBytes:
case TR::java_lang_Integer_reverseBytes:
case TR::java_lang_Short_reverseBytes:
case TR::java_lang_System_nanoTime:
case TR::java_util_concurrent_atomic_Fences_orderAccesses:
case TR::java_util_concurrent_atomic_Fences_orderReads:
Expand Down
25 changes: 1 addition & 24 deletions runtime/compiler/z/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3791,10 +3791,7 @@ J9::Z::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod me
return true;
}

if (method == TR::java_lang_Long_reverseBytes ||
method == TR::java_lang_Integer_reverseBytes ||
method == TR::java_lang_Short_reverseBytes ||
method == TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet ||
if (method == TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet ||
method == TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd ||
method == TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement ||
method == TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement ||
Expand Down Expand Up @@ -3845,10 +3842,6 @@ extern TR::Register *inlineNumberOfLeadingZeros(TR::Node *node, TR::CodeGenerato
extern TR::Register *inlineNumberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg, int32_t subfconst);
extern TR::Register *inlineTrailingZerosQuadWordAtATime(TR::Node *node, TR::CodeGenerator *cg);

extern TR::Register *inlineLongReverseBytes(TR::Node *node, TR::CodeGenerator *cg);
extern TR::Register *inlineIntegerReverseBytes(TR::Node *node, TR::CodeGenerator *cg);
extern TR::Register *inlineShortReverseBytes(TR::Node *node, TR::CodeGenerator *cg);

extern TR::Register *inlineBigDecimalConstructor(TR::Node *node, TR::CodeGenerator *cg, bool isLong, bool exp);
extern TR::Register *inlineBigDecimalBinaryOp(TR::Node * node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, bool scaled);
extern TR::Register *inlineBigDecimalScaledDivide(TR::Node * node, TR::CodeGenerator *cg);
Expand Down Expand Up @@ -4184,22 +4177,6 @@ J9::Z::CodeGenerator::inlineDirectCall(
}
}


switch (methodSymbol->getRecognizedMethod())
{
case TR::java_lang_Long_reverseBytes:
resultReg = inlineLongReverseBytes(node, cg);
return true;
case TR::java_lang_Integer_reverseBytes:
resultReg = inlineIntegerReverseBytes(node, cg);
return true;
case TR::java_lang_Short_reverseBytes:
resultReg = inlineShortReverseBytes(node, cg);
return true;
default:
break;
}

if (!comp->compileRelocatableCode() && !comp->getOption(TR_DisableDFP) &&
comp->target().cpu.supportsFeature(OMR_FEATURE_S390_DFP))
{
Expand Down

0 comments on commit ac22731

Please sign in to comment.