Skip to content

Commit

Permalink
Avoid generating store of uninitialized auto when reducing TRT2
Browse files Browse the repository at this point in the history
In the case of a single non-constant delimiter, it's possible for the
pattern to match when the original load and comparison look as follows:

    n56n      istore  elem
    n55n        b2i
    n54n          bloadi <array-shadow>
                    ...
    n60n      istore tmpDelim
    n59n        b2i
    n58n          i2b
    n57n            iload delim
    n65n      ificmpeq --> block_EXIT
    n55n        ==>b2i
    n59n        ==>b2i

With such a match, the booltable node of the pattern graph corresponds
to the ificmpeq node of the target graph. The transformer function for
TRT2 (CISCTransform2FindBytes) finds this target node and from there it
uses the second child as the delimiter, but instead of the b2i that one
might expect, that second child is a variable (tmpDelim) that has been
matched up with the store node (n60n in this example). This situation
results in the transformation generating a load of tmpDelim while at the
same time removing the definition (n60n) that provides the value that is
expected at that load.

This problem should be a pretty rare occurrence. If the tmpDelim store
is dead, it should usually have been eliminated. If OTOH it isn't dead,
then the pattern doesn't match. As such, it would probably be reasonable
to detect this case and simply refuse to transform. However, it's not
necessarily straightforward to detect the problem. I believe that
tableCISCNode->getHeadOfTrNodeInfo()->_node being a store indicates that
the problem is occurring, but it's not obvious that we couldn't see the
same fundamental problem with a load node, or some other node that has a
load as a descendant. With this uncertainty, reliably detecting the
problem case requires walking a subtree and looking for auto loads that
aren't loop-invariant.

Since detecting the problem is already that complex, and since even in
the presence of a store the delimiter might be loop-invariant anyway,
this commit goes slightly further and chases down single definitions to
construct an expression that does not load from autos that are defined
in the loop (if possible). In the example above, that means that we
still remove the tmpDelim store, but now the arraytranslateAndTest node
uses b2i (i2b (iload delim)) instead of iload tmpDelim.
  • Loading branch information
jdmpapin committed Jun 5, 2023
1 parent d41eba9 commit d33313e
Showing 1 changed file with 210 additions and 1 deletion.
211 changes: 210 additions & 1 deletion runtime/compiler/optimizer/IdiomTransformations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <map>
#include "codegen/CodeGenerator.hpp"
#include "env/FrontEnd.hpp"
#include "compile/Compilation.hpp"
Expand Down Expand Up @@ -54,6 +55,7 @@
#include "optimizer/IdiomRecognitionUtils.hpp"
#include "optimizer/Optimization_inlines.hpp"
#include "optimizer/Optimizer.hpp"
#include "optimizer/Structure.hpp"
#include "optimizer/UseDefInfo.hpp"
#include "ras/Debug.hpp"

Expand Down Expand Up @@ -1016,6 +1018,199 @@ areArraysInvariant(TR::Compilation *comp, TR::Node *inputNode, TR::Node *outputN
return true;
}

namespace { // file-local

class AutoLoopInvarianceInfo
{
TR::Compilation * const _comp;
TR::Region &_region;
TR_UseDefInfo * const _useDefInfo;
TR_BitVector _storedAutos; // autos defined in the loop
TR::NodeChecklist _autoStores; // stores to autos in the loop
TR::NodeChecklist _autoLoads; // loads of autos in the loop
TR::NodeChecklist _defsOnStack; // to check for cyclic definition chasing

// Memoize successful results of invariantExprImpl() to avoid exponential walks.
// This is reset for every top-level call to invariantExpr().
typedef TR::typed_allocator<std::pair<TR::Node * const, TR::Node*>, TR::Region&> MemoMapAlloc;
std::map<TR::Node*, TR::Node*, std::less<TR::Node*>, MemoMapAlloc> _invariantExprMemo;

public:
AutoLoopInvarianceInfo(
TR::Compilation *comp, TR_UseDefInfo *ud, TR_RegionStructure *loop);

TR::Node *invariantExpr(TR::Node *node);

private:
void findAutoStoresAndLoads(TR_RegionStructure *region, TR::NodeChecklist &visited);
void findAutoLoads(TR::Node *node, TR::NodeChecklist &visited);
TR::Node *invariantExprImpl(TR::Node *node);
TR::Node *invariantExprFromDef(TR::Node *defNode);
};

AutoLoopInvarianceInfo::AutoLoopInvarianceInfo(
TR::Compilation *comp, TR_UseDefInfo *ud, TR_RegionStructure *loop)
: _comp(comp)
, _region(comp->trMemory()->currentStackRegion())
, _useDefInfo(ud)
, _storedAutos(comp->getSymRefCount(), _region)
, _autoStores(comp)
, _autoLoads(comp)
, _defsOnStack(comp)
, _invariantExprMemo(std::less<TR::Node*>(), _region)
{
TR::NodeChecklist visited(comp);
findAutoStoresAndLoads(loop, visited);
}

void AutoLoopInvarianceInfo::findAutoStoresAndLoads(
TR_RegionStructure *region, TR::NodeChecklist &visited)
{
TR_RegionStructure::Cursor it(*region);
for (auto *subNode = it.getFirst(); subNode != NULL; subNode = it.getNext())
{
TR_Structure *structure = subNode->getStructure();
TR_RegionStructure *childRegion = structure->asRegion();
if (childRegion != NULL)
{
findAutoStoresAndLoads(childRegion, visited);
continue;
}

TR::Block *block = structure->asBlock()->getBlock();
TR::TreeTop *entry = block->getEntry();
TR::TreeTop *exit = block->getExit();
for (TR::TreeTop *tt = entry; tt != exit; tt = tt->getNextTreeTop())
{
TR::Node *node = tt->getNode();
findAutoLoads(node, visited);
if (node->getOpCode().isStoreDirect() && node->getSymbol()->isAutoOrParm())
{
_storedAutos.set(node->getSymbolReference()->getReferenceNumber());
_autoStores.add(node);
}
}
}
}

void AutoLoopInvarianceInfo::findAutoLoads(TR::Node *node, TR::NodeChecklist &visited)
{
if (visited.contains(node))
return;

visited.add(node);
if (node->getOpCode().isLoadVarDirect() && node->getSymbol()->isAutoOrParm())
_autoLoads.add(node);

int32_t numChildren = node->getNumChildren();
for (int32_t i = 0; i < numChildren; i++)
findAutoLoads(node->getChild(i), visited);
}

TR::Node *AutoLoopInvarianceInfo::invariantExpr(TR::Node *node)
{
_invariantExprMemo.clear();
if (node->getOpCode().isStore())
return invariantExprFromDef(node);
else
return invariantExprImpl(node);
}

TR::Node *AutoLoopInvarianceInfo::invariantExprImpl(TR::Node *node)
{

if (node->getOpCode().isLoadVarDirect() && node->getSymbol()->isAutoOrParm())
{
TR_ASSERT_FATAL_WITH_NODE(
node, _autoLoads.contains(node), "expected auto load to be in the loop");

if (_storedAutos.isSet(node->getSymbolReference()->getReferenceNumber()))
{
// Because the auto is defined within the loop, and because node is
// within the loop, at least one definition from inside the loop
// reaches node. But node is still invariant if there is exactly one
// such definition and if its RHS is invariant.

uint16_t useIndex = node->getUseDefIndex();
if (useIndex == 0 || !_useDefInfo->isUseIndex(useIndex))
return NULL;

TR_UseDefInfo::BitVector defs(_comp->allocator());
if (!_useDefInfo->getUseDef(defs, useIndex) || defs.PopulationCount() != 1)
return NULL;

TR_UseDefInfo::BitVector::Cursor cursor(defs);
cursor.SetToFirstOne();
TR_ASSERT_FATAL_WITH_NODE(
node, cursor.Valid(), "single def missing from cursor");

int32_t defIndex = cursor;
TR_ASSERT_FATAL_WITH_NODE(
node,
defIndex >= _useDefInfo->getFirstRealDefIndex(),
"despite in-loop definition, param reaches this use");

return invariantExprFromDef(_useDefInfo->getNode(defIndex));
}
}
else if (node->getOpCode().hasSymbolReference()
&& node->getOpCodeValue() != TR::loadaddr)
{
return NULL; // not handled - treat as non-invariant
}

static const int32_t maxChildren = 3;
int32_t numChildren = node->getNumChildren();
if (numChildren > maxChildren)
return NULL; // not handled - treat as non-invariant

auto memoInsertResult = _invariantExprMemo.insert(
std::make_pair(node, (TR::Node*)NULL));

auto memoEntry = memoInsertResult.first;
if (!memoInsertResult.second) // already present in the map
return memoEntry->second;

TR::Node *children[maxChildren] = {};
for (int32_t i = 0; i < numChildren; i++)
{
children[i] = invariantExprImpl(node->getChild(i));
if (children[i] == NULL)
return NULL;
}

bool duplicateChildren = false;
TR::Node *result = node->duplicateTree(duplicateChildren);
for (int32_t i = 0; i < numChildren; i++)
{
node->getChild(i)->decReferenceCount(); // undo duplicateTree() increment
result->setAndIncChild(i, children[i]);
}

memoEntry->second = result;
return result;
}

TR::Node *AutoLoopInvarianceInfo::invariantExprFromDef(TR::Node *defNode)
{
TR_ASSERT_FATAL_WITH_NODE(
defNode,
_autoStores.contains(defNode),
"expected an auto store in the loop");

TR_ASSERT_FATAL_WITH_NODE(
defNode,
!_defsOnStack.contains(defNode),
"circular single-definition dependency");

_defsOnStack.add(defNode);
TR::Node *result = invariantExprImpl(defNode->getChild(0));
_defsOnStack.remove(defNode);
return result;
}


} // anonymous namespace

// used for a TRTO reduction in java/io/DataOutputStream.writeUTF(String)
//
Expand Down Expand Up @@ -1571,8 +1766,22 @@ CISCTransform2FindBytes(TR_CISCTransformer *trans)

if (count == -1) // single delimiter which is not constant value
{
AutoLoopInvarianceInfo inv(
trans->comp(), trans->optimizer()->getUseDefInfo(), trans->getCurrentLoop());

TR_CISCNode *tableCISCNode = tBoolTable->getChild(1);
tableNode = createLoad(tableCISCNode->getHeadOfTrNodeInfo()->_node);
TR::Node *origComparand = tableCISCNode->getHeadOfTrNodeInfo()->_node;
tableNode = inv.invariantExpr(origComparand);
if (tableNode == NULL)
{
traceMsg(
comp,
"Abandoning reduction: failed to create loop-invariant expression for n%un [%p]\n",
origComparand->getGlobalIndex(),
origComparand);
return false;
}

if (disptrace) traceMsg(comp, "Single non-constant delimiter found. Setting %p as tableNode.\n", comp->getDebug()->getName(tableCISCNode->getHeadOfTrNodeInfo()->_node));
}
else if (count == 1) // single delimiter
Expand Down

0 comments on commit d33313e

Please sign in to comment.