Skip to content

Commit

Permalink
Implement Z codegen support for several atomic symbols
Browse files Browse the repository at this point in the history
Implement inlined intrinsic support for the following symbols as
defined in #2958:

- atomicAdd32BitSymbol
- atomicAdd64BitSymbol
- atomicFetchAndAdd32BitSymbol
- atomicFetchAndAdd64BitSymbol
- atomicSwap32BitSymbol
- atomicSwap64BitSymbol

Signed-off-by: Filip Jeremic <fjeremic@ca.ibm.com>
  • Loading branch information
fjeremic committed Sep 26, 2018
1 parent 40babba commit 1bdb8dd
Show file tree
Hide file tree
Showing 2 changed files with 239 additions and 1 deletion.
165 changes: 164 additions & 1 deletion compiler/z/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11243,7 +11243,55 @@ OMR::Z::TreeEvaluator::directCallEvaluator(TR::Node * node, TR::CodeGenerator *

if (!cg->inlineDirectCall(node, resultReg))
{
resultReg = TR::TreeEvaluator::performCall(node, false, cg);
TR::SymbolReference* symRef = node->getSymbolReference();

if (symRef != NULL && symRef->getSymbol()->castToMethodSymbol()->isInlinedByCG())
{
TR::Compilation* comp = cg->comp();

if (comp->getSymRefTab()->isNonHelper(symRef, TR::SymbolReferenceTable::atomicAdd32BitSymbol))
{
TR_ASSERT_FATAL(node->getChild(1)->getDataType().isInt32(), "Value child of atomicAdd32BitSymbol call should have 32-bit data type");

resultReg = intrinsicAtomicAdd(node, cg);
}
else if (comp->getSymRefTab()->isNonHelper(symRef, TR::SymbolReferenceTable::atomicAdd64BitSymbol))
{
TR_ASSERT_FATAL(node->getChild(1)->getDataType().isInt64(), "Value child of atomicAdd64BitSymbol call should have 64-bit data type");

resultReg = intrinsicAtomicAdd(node, cg);
}
else if (comp->getSymRefTab()->isNonHelper(symRef, TR::SymbolReferenceTable::atomicFetchAndAdd32BitSymbol))
{
TR_ASSERT_FATAL(node->getChild(1)->getDataType().isInt32(), "Value child of atomicFetchAndAdd32BitSymbol call should have 32-bit data type");

resultReg = intrinsicAtomicFetchAndAdd(node, cg);
}
else if (comp->getSymRefTab()->isNonHelper(symRef, TR::SymbolReferenceTable::atomicFetchAndAdd64BitSymbol))
{
TR_ASSERT_FATAL(node->getChild(1)->getDataType().isInt64(), "Value child of atomicFetchAndAdd64BitSymbol call should have 64-bit data type");

resultReg = intrinsicAtomicFetchAndAdd(node, cg);
}
else if (comp->getSymRefTab()->isNonHelper(symRef, TR::SymbolReferenceTable::atomicSwap32BitSymbol))
{
TR_ASSERT_FATAL(node->getChild(1)->getDataType().isInt32(), "Value child of atomicAtomicSwap32BitSymbol call should have 32-bit data type");

resultReg = intrinsicAtomicSwap(node, cg);
}
else if (comp->getSymRefTab()->isNonHelper(symRef, TR::SymbolReferenceTable::atomicSwap64BitSymbol))
{
TR_ASSERT_FATAL(node->getChild(1)->getDataType().isInt64(), "Value child of atomicAtomicSwap64BitSymbol call should have 64-bit data type");

resultReg = intrinsicAtomicSwap(node, cg);
}
}

if (resultReg == NULL)
{
resultReg = TR::TreeEvaluator::performCall(node, false, cg);
}

// on 64bit, lcall returns 64bit registers
if (cg->supportsHighWordFacility() && !cg->comp()->getOption(TR_DisableHighWordRA) && TR::Compiler->target.is64Bit())
{
Expand Down Expand Up @@ -19171,6 +19219,121 @@ OMR::Z::TreeEvaluator::genLoadForObjectHeadersMasked(TR::CodeGenerator *cg, TR::
return iCursor;
}

TR::Register*
OMR::Z::TreeEvaluator::intrinsicAtomicAdd(TR::Node* node, TR::CodeGenerator* cg)
{
TR_ASSERT_FATAL(cg->getS390ProcessorInfo()->supportsArch(TR_S390ProcessorInfo::TR_z196) &&
(TR::Compiler->target.is64Bit() || cg->use64BitRegsOn32Bit()), "Atomic add intrinsics only supported z196 on 64-bit targets");

TR::Node* addressNode = node->getChild(0);
TR::Node* valueNode = node->getChild(1);

TR::Register* addressReg = cg->evaluate(addressNode);
TR::Register* valueReg = cg->evaluate(valueNode);

// Used to hold the return value of LAA instruction but will be discarded
TR::Register* tempReg = cg->allocateRegister();

TR::MemoryReference* addressMemRef = generateS390MemoryReference(addressReg, 0, cg);

auto mnemonic =
valueNode->getDataType().isInt32() ?
TR::InstOpCode::LAA :
TR::InstOpCode::LAAG;

generateRSInstruction(cg, mnemonic, node, tempReg, valueReg, addressMemRef);

node->setRegister(valueReg);
cg->stopUsingRegister(tempReg);
cg->decReferenceCount(addressNode);
cg->decReferenceCount(valueNode);

return valueReg;
}

TR::Register*
OMR::Z::TreeEvaluator::intrinsicAtomicFetchAndAdd(TR::Node* node, TR::CodeGenerator* cg)
{
TR_ASSERT_FATAL(cg->getS390ProcessorInfo()->supportsArch(TR_S390ProcessorInfo::TR_z196) &&
(TR::Compiler->target.is64Bit() || cg->use64BitRegsOn32Bit()), "Atomic add intrinsics only supported z196 on 64-bit targets");

TR::Node* addressNode = node->getChild(0);
TR::Node* valueNode = node->getChild(1);

TR::Register* addressReg = cg->evaluate(addressNode);
TR::Register* valueReg = cg->gprClobberEvaluate(valueNode);

TR::MemoryReference* addressMemRef = generateS390MemoryReference(addressReg, 0, cg);

auto mnemonic =
valueNode->getDataType().isInt32() ?
TR::InstOpCode::LAA :
TR::InstOpCode::LAAG;

generateRSInstruction(cg, mnemonic, node, valueReg, valueReg, addressMemRef);

node->setRegister(valueReg);
cg->decReferenceCount(addressNode);
cg->decReferenceCount(valueNode);

return valueReg;
}

TR::Register*
OMR::Z::TreeEvaluator::intrinsicAtomicSwap(TR::Node* node, TR::CodeGenerator* cg)
{
TR::Node* addressNode = node->getChild(0);
TR::Node* valueNode = node->getChild(1);

TR::Register* addressReg = cg->evaluate(addressNode);
TR::Register* valueReg = cg->evaluate(valueNode);
TR::Register* returnReg = cg->allocateRegister();

TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3, cg);

dependencies->addPostCondition(addressReg, TR::RealRegister::AssignAny);
dependencies->addPostCondition(valueReg, TR::RealRegister::AssignAny);
dependencies->addPostCondition(returnReg, TR::RealRegister::AssignAny);

TR::LabelSymbol* loopLabel = generateLabelSymbol(cg);
TR::LabelSymbol* doneLabel = generateLabelSymbol(cg);

loopLabel->setStartInternalControlFlow();
doneLabel->setEndInternalControlFlow();

TR::MemoryReference* addressMemRef = generateS390MemoryReference(addressReg, 0, cg);

auto mnemonic =
valueNode->getDataType().isInt32() ?
TR::InstOpCode::L :
TR::InstOpCode::LG;

// Load the original value
generateRXInstruction(cg, mnemonic, node, returnReg, addressMemRef);

generateS390LabelInstruction(cg, TR::InstOpCode::LABEL, node, loopLabel);

addressMemRef = generateS390MemoryReference(*addressMemRef, 0, cg);

mnemonic =
valueNode->getDataType().isInt32() ?
TR::InstOpCode::CS :
TR::InstOpCode::CSG;

// Compare and swap against the original value
generateRSInstruction(cg, mnemonic, node, returnReg, valueReg, addressMemRef);

// Branch if the compare and swap failed and try again
generateS390BranchInstruction(cg, TR::InstOpCode::BRC,TR::InstOpCode::COND_CC1, node, loopLabel);

generateS390LabelInstruction(cg, TR::InstOpCode::LABEL, node, doneLabel, dependencies);

node->setRegister(returnReg);
cg->decReferenceCount(addressNode);
cg->decReferenceCount(valueNode);

return returnReg;
}

///// SIMD Evaluators End /////////////

Expand Down
75 changes: 75 additions & 0 deletions compiler/z/codegen/OMRTreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,81 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator

private:

/** \brief
* Inlines an intrinsic for calls to the following two symbols:
*
* - atomicAdd32BitSymbol
* - atomicAdd64BitSymbol
*
* which are represented by a call node of the form for 32-bit (64-bit similar):
*
* \code
* icall <atomicAdd32BitSymbol>
* <address>
* <value>
* \endcode
*
* \param node
* The respective (i|l)call node.
*
* \param cg
* The code generator used to generate the instructions.
*
* \return
* A register holding the <value> node.
*/
static TR::Register* intrinsicAtomicAdd(TR::Node* node, TR::CodeGenerator* cg);

/** \brief
* Inlines an intrinsic for calls to the following two symbols:
*
* - atomicFetchAndAdd32BitSymbol
* - atomicFetchAndAdd64BitSymbol
*
* which are represented by a call node of the form for 32-bit (64-bit similar):
*
* \code
* icall <atomicFetchAndAdd32BitSymbol>
* <address>
* <value>
* \endcode
*
* \param node
* The respective (i|l)call node.
*
* \param cg
* The code generator used to generate the instructions.
*
* \return
* A register holding the original value in memory (before the addition) at the <address> location.
*/
static TR::Register* intrinsicAtomicFetchAndAdd(TR::Node* node, TR::CodeGenerator* cg);

/** \brief
* Inlines an intrinsic for calls to the following two symbols:
*
* - atomicSwap32BitSymbol
* - atomicSwap64BitSymbol
*
* which are represented by a call node of the form for 32-bit (64-bit similar):
*
* \code
* icall <atomicSwap32BitSymbol>
* <address>
* <value>
* \endcode
*
* \param node
* The respective (i|l)call node.
*
* \param cg
* The code generator used to generate the instructions.
*
* \return
* A register holding the original value in memory (before the swap) at the <address> location.
*/
static TR::Register* intrinsicAtomicSwap(TR::Node* node, TR::CodeGenerator* cg);

static void commonButestEvaluator(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register* ifFoldingHelper(TR::Node *node, TR::CodeGenerator *cg, bool &handledBIF);
static bool isCandidateForBIFEvaluation(TR::Node * node);
Expand Down

0 comments on commit 1bdb8dd

Please sign in to comment.