From 440457a89e39b85527965b0ea7cc9612859b9f7d Mon Sep 17 00:00:00 2001 From: Marius Pirvu Date: Thu, 18 Jan 2024 10:02:23 -0500 Subject: [PATCH] Revert "GRA changes to reduce JIT overhead at warm opt levels" --- compiler/control/OMROptions.cpp | 3 - compiler/control/OMROptions.hpp | 3 - .../optimizer/GlobalRegisterAllocator.cpp | 917 +++++++++++++++--- .../optimizer/GlobalRegisterAllocator.hpp | 7 +- compiler/optimizer/OMRRegisterCandidate.cpp | 55 +- 5 files changed, 834 insertions(+), 151 deletions(-) diff --git a/compiler/control/OMROptions.cpp b/compiler/control/OMROptions.cpp index 3f5c36be91b..17fdc69aa1d 100644 --- a/compiler/control/OMROptions.cpp +++ b/compiler/control/OMROptions.cpp @@ -827,8 +827,6 @@ TR::OptionTable OMR::Options::_jitOptions[] = { {"GCRresetCount=", "R\tthe value to which the counter is reset to after being tripped by guarded counting recompilations (positive value)", TR::Options::setCount, offsetof(OMR::Options,_GCRResetCount), 0, "F%d"}, {"generateCompleteInlineRanges", "O\tgenerate meta data ranges for each change in inliner depth", SET_OPTION_BIT(TR_GenerateCompleteInlineRanges), "F"}, - {"graFreqThresholdAtWarm=", "O\tgra threshold for block frequency for opt level less of equal to warm", - TR::Options::set32BitNumeric, offsetof(OMR::Options, _graFreqThresholdAtWarm), 500, "F%d"}, {"help", " \tdisplay this help information", TR::Options::helpOption, 0, 0, "F", NOT_IN_SUBSET}, {"help=", " {regex}\tdisplay help for options whose names match {regex}", TR::Options::helpOption, 1, 0, "F", NOT_IN_SUBSET}, {"highCodeCacheOccupancyBCount=", "R\tthe initial invocation count used during high code cache occupancy for methods with loops", @@ -2678,7 +2676,6 @@ OMR::Options::jitPreProcess() _alwaysWorthInliningThreshold = 15; _maxLimitedGRACandidates = TR_MAX_LIMITED_GRA_CANDIDATES; _maxLimitedGRARegs = TR_MAX_LIMITED_GRA_REGS; - _graFreqThresholdAtWarm = 500; _counterBucketGranularity = 2; _minCounterFidelity = INT_MIN; _lastIpaOptTransformationIndex = INT_MAX; diff --git a/compiler/control/OMROptions.hpp b/compiler/control/OMROptions.hpp index f386f557f62..b58bb505479 100644 --- a/compiler/control/OMROptions.hpp +++ b/compiler/control/OMROptions.hpp @@ -1468,7 +1468,6 @@ class OMR_EXTENSIBLE Options _insertGCRTrees = false; _maxLimitedGRACandidates = 0; _maxLimitedGRARegs = 0; - _graFreqThresholdAtWarm = 0; _enableGPU = 0; _isAOTCompile = false; _jProfilingMethodRecompThreshold = 0; @@ -1823,7 +1822,6 @@ class OMR_EXTENSIBLE Options int32_t getAlwaysWorthInliningThreshold() const { return _alwaysWorthInliningThreshold; } int32_t getMaxLimitedGRACandidates() { return _maxLimitedGRACandidates; } int32_t getMaxLimitedGRARegs() { return _maxLimitedGRARegs; } - int32_t getGRAFreqThresholdAtWarm() { return _graFreqThresholdAtWarm; } int32_t getNumLimitedGRARegsWithheld(); int32_t getProfilingCompNodecountThreshold() { return _profilingCompNodecountThreshold; } @@ -2483,7 +2481,6 @@ class OMR_EXTENSIBLE Options int32_t _maxLimitedGRACandidates; int32_t _maxLimitedGRARegs; - int32_t _graFreqThresholdAtWarm; int32_t _enableGPU; diff --git a/compiler/optimizer/GlobalRegisterAllocator.cpp b/compiler/optimizer/GlobalRegisterAllocator.cpp index 233a940d1db..27a5d3ec64a 100644 --- a/compiler/optimizer/GlobalRegisterAllocator.cpp +++ b/compiler/optimizer/GlobalRegisterAllocator.cpp @@ -79,7 +79,7 @@ #define GRA_COMPLEXITY_LIMIT 1000000000 -static bool isHot(TR::Compilation *comp) { return comp->getMethodHotness() >= hot || comp->getOption(TR_NotCompileTimeSensitive); } +static bool isHot(TR::Compilation *comp) { return comp->getMethodHotness() >= hot; } #define HAVE_DIFFERENT_MSB_TO_LSB_OFFSETS(r1,r2) \ ((((r1)->getHostByteOffset() + (r1)->getSize()) - ((r2)->getHostByteOffset() + (r2)->getSize())) != 0) @@ -310,6 +310,7 @@ TR_GlobalRegisterAllocator::perform() if (comp()->isGPUCompilation()) return 1; + walkTreesAndCollectSymbolDataTypes(); comp()->getOptimizer()->setResetExitsGRA(0); @@ -354,9 +355,13 @@ TR_GlobalRegisterAllocator::perform() for (a = locals.getFirst(); a != NULL; a = locals.getNext()) ++numLocals; - ListIterator parms(&comp()->getMethodSymbol()->getParameterList()); - for (TR::ParameterSymbol *p = parms.getFirst(); p != NULL; p = parms.getNext()) - ++numLocals; + if (comp()->getOption(TR_EnableAggressiveLiveness)) + { + TR::ParameterSymbol *p; + ListIterator parms(&comp()->getMethodSymbol()->getParameterList()); + for (p = parms.getFirst(); p != NULL; p = parms.getNext()) + ++numLocals; + } const uint64_t MAX_BITVECTOR_MEMORY_USAGE = 1000000000; uint64_t bitvectorMemoryUsage = numLocals * comp()->getFlowGraph()->getNextNodeNumber(); @@ -366,7 +371,8 @@ TR_GlobalRegisterAllocator::perform() { // Perform liveness analysis // - TR_Liveness liveLocals(comp(), optimizer(), comp()->getFlowGraph()->getStructure(), false, NULL, false, true); + TR_Liveness liveLocals(comp(), optimizer(), comp()->getFlowGraph()->getStructure(), + false, NULL, false, comp()->getOption(TR_EnableAggressiveLiveness)); liveLocals.perform(comp()->getFlowGraph()->getStructure()); @@ -416,12 +422,10 @@ TR_GlobalRegisterAllocator::perform() } candidates->getReferencedAutoSymRefs(comp()->trMemory()->currentStackRegion()); - - static const char *skipit = feGetEnv("TR_SkipOfferAllGRA"); - if (NULL == skipit) - { + if (!comp()->mayHaveLoops() || cg()->considerAllAutosAsTacticalGlobalRegisterCandidates()) offerAllAutosAndRegisterParmAsCandidates(cfgBlocks, numberOfBlocks); - } + else + offerAllFPAutosAndParmsAsCandidates(cfgBlocks, numberOfBlocks); _registerCandidates = new (trStackMemory()) SymRefCandidateMap((SymRefCandidateMapComparator()), SymRefCandidateMapAllocator(trMemory()->currentStackRegion())); @@ -432,6 +436,10 @@ TR_GlobalRegisterAllocator::perform() (*_registerCandidates)[rc->getSymbolReference()->getReferenceNumber()] = rc; } + findIfThenRegisterCandidates(); + + findLoopAutoRegisterCandidates(); + if (comp()->getOptions()->realTimeGC() && comp()->compilationShouldBeInterrupted(GRA_AFTER_FIND_LOOP_AUTO_CONTEXT)) { @@ -2642,6 +2650,280 @@ TR_GlobalRegisterAllocator::getGlobalRegister(TR::Symbol * symbol, TR_ArrayphaseTimer()); + + TR_ScratchList registerCandidates(trMemory()); + TR::CFG * cfg = comp()->getFlowGraph(); + + TR::ResolvedMethodSymbol *methodSymbol = comp()->getJittedMethodSymbol(); + ListIterator paramIterator(&(methodSymbol->getParameterList())); + ListIterator autoIterator(&(methodSymbol->getAutomaticList())); + TR::ParameterSymbol *paramCursor = paramIterator.getFirst(); + TR::AutomaticSymbol *autoCursor = autoIterator.getFirst(); + + // This first part isn't really looking for If-Then-Else candidates. + // It is visiting all parameters and locals and ensuring that the candidate has a BlockInfo entry if it + // is live in any block. + if(!debug("oldIfThen")) + { + LexicalTimer t("newFindIfThen", comp()->phaseTimer()); + + // First create a bit vector of auto and parm symbols + // Guess at size of bit vector to use by getting first block's LiveLocals bitvector and check its size + TR_BitVector *guess = toBlock(cfg->getFirstNode())->getLiveLocals(); + int32_t guessSize = 1024; + if(guess && guess->numChunks()*BITS_IN_CHUNK > guessSize) + guessSize = guess->numChunks()*BITS_IN_CHUNK; + + TR_BitVector autoAndParmLiveLocalIndex(guessSize, trMemory(), stackAlloc, growable); + TR_Array registerCandidateByIndex(trMemory(), guessSize, false, stackAlloc); + autoAndParmLiveLocalIndex.empty(); + int32_t i; + while (paramCursor != NULL) + { + if (paramCursor->isReferencedParameter()) + { + TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(paramCursor); + if (!rc) + { + paramCursor = paramIterator.getNext(); + continue; + } + i = paramCursor->getLiveLocalIndex(); + autoAndParmLiveLocalIndex.set(i); + registerCandidateByIndex[i] = rc; + } + paramCursor = paramIterator.getNext(); + } + while (autoCursor != NULL) + { + TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(autoCursor); + if (!rc) + { + autoCursor = autoIterator.getNext(); + continue; + } + i = autoCursor->getLiveLocalIndex(); + autoAndParmLiveLocalIndex.set(i); + registerCandidateByIndex[i] = rc; + autoCursor = autoIterator.getNext(); + } + + // Now visit all blocks and intersect each blocks LiveLocals with autoAndParmLiveLocalIndex. + // For each intersected bit ensure BlockInfo exists for the candidate and initialize it + // to zero NumberOfLoadsAndStores if it does not exist + guessSize = autoAndParmLiveLocalIndex.numChunks()*BITS_IN_CHUNK; + TR_BitVector intersection(guessSize, trMemory(), stackAlloc, growable); + for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) + { + TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); + if (cg()->getLiveLocals() && liveLocals) + { + if (block != comp()->getStartBlock() && + block != cfg->getStart() && + block != cfg->getEnd()) + { + intersection = autoAndParmLiveLocalIndex; + intersection &= *liveLocals; + TR_BitVectorIterator bvi(intersection); + while (bvi.hasMoreElements()) + { + int32_t autoOrParm = bvi.getNextElement(); + TR::RegisterCandidate *rc=registerCandidateByIndex[autoOrParm]; + if(!rc->getBlocks().find(block->getNumber())) + rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); + } + } + } + } + } + else + { + LexicalTimer t("oldFindIfThen", comp()->phaseTimer()); + while (paramCursor != NULL) + { + if (paramCursor->isReferencedParameter()) + { + TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(paramCursor); + if (!rc) + { + paramCursor = paramIterator.getNext(); + continue; + } + + for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) + { + TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); + if (cg()->getLiveLocals() && + liveLocals && !rc->getBlocks().find(block->getNumber())) + { + if (block != comp()->getStartBlock() && + block != cfg->getStart() && + block != cfg->getEnd() && + liveLocals->get(paramCursor->getLiveLocalIndex())) + { + rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); + } + } + } + } + + paramCursor = paramIterator.getNext(); + } + + while (autoCursor != NULL) + { + //if (paramCursor->isReferencedParameter()) + { + TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->find(autoCursor); + if (!rc) + { + autoCursor = autoIterator.getNext(); + continue; + } + + for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) + { + TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); + if (cg()->getLiveLocals() && + liveLocals && !rc->getBlocks().find(block->getNumber())) + { + if (block != comp()->getStartBlock() && + block != cfg->getStart() && + block != cfg->getEnd() && + liveLocals->get(autoCursor->getLiveLocalIndex())) + { + rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); + } + } + } + } + + autoCursor = autoIterator.getNext(); + } + } + + for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) + { + TR::CFGEdgeList& edges = block->getSuccessors(); + TR::Block *currBlock = toBlock(block); + if ((edges.size() == 2) && currBlock->getExit()) + { + TR::Block * block1 = toBlock(edges.front()->getTo()); + TR::Block * block2 = toBlock((*(++edges.begin()))->getTo()); + + TR::Block *mergeBlock1 = NULL, *mergeBlock2 = NULL; + if (block1->getSuccessors().size() == 1) + mergeBlock1 = toBlock(block1->getSuccessors().front()->getTo()); + + if (mergeBlock1) + { + if (block2->hasSuccessor(mergeBlock1)) + mergeBlock2 = mergeBlock1; + } + else + { + if (block2->getSuccessors().size() == 1) + mergeBlock2 = toBlock(block2->getSuccessors().front()->getTo()); + + if (mergeBlock2) + { + if (block1->hasSuccessor(mergeBlock2)) + mergeBlock1 = mergeBlock2; + } + } + + + //if (block1->getSuccessors().size() == 1) && block2->getSuccessors().size() == 1)) + { + //TR::Block * mergeBlock1 = toBlock(block1->getSuccessors().front()->getTo()); + //TR::Block * mergeBlock2 = toBlock(block2->getSuccessors().front()->getTo()); + if (mergeBlock1 && + mergeBlock1 == mergeBlock2 && + mergeBlock1 != cfg->getEnd()) + { + for (TR::TreeTop * tt1 = block1->getEntry(); tt1 != block1->getExit(); tt1 = tt1->getNextTreeTop()) + { + TR::Node * storeNode1 = tt1->getNode()->getStoreNode(); + if (storeNode1 && storeNode1->getOpCode().isStoreDirect()) + { + TR::RegisterCandidate * rc = 0; + TR::SymbolReference * symRef = tt1->getNode()->getSymbolReference(); + if ((symRef->getSymbol()->isAutoOrParm() ) && + isSymRefAvailable(symRef)) + for (TR::TreeTop * tt2 = block2->getEntry(); tt2 != block2->getExit(); tt2 = tt2->getNextTreeTop()) + { + TR::Node * storeNode2 = tt2->getNode()->getStoreNode(); + if (storeNode2 && storeNode2->getOpCode().isStoreDirect() && storeNode2->getSymbolReference() == symRef) + { + rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); + break; + } + } + if (rc) + { + int32_t weight = 3; + if (mergeBlock1->findFirstReference(symRef->getSymbol(), comp()->incVisitCount())) + { + if (mergeBlock1->getStructureOf()) + optimizer()->getStaticFrequency(mergeBlock1, &weight); + + rc->addBlock(mergeBlock1, weight); + } + if (toBlock(block)->findFirstReference(symRef->getSymbol(), comp()->incVisitCount())) + { + weight = 1; + if (toBlock(block)->getStructureOf()) + optimizer()->getStaticFrequency(toBlock(block), &weight); + + rc->addBlock(block1, weight); + rc->addBlock(block2, weight); + } + } + } + } + } + } + + + TR::Node *virtualGuard = currBlock->getLastRealTreeTop()->getNode(); + if (virtualGuard->isTheVirtualGuardForAGuardedInlinedCall()) + { + TR::Block *branchBlock = virtualGuard->getBranchDestination()->getNode()->getBlock(); + TR::TreeTop *tt1 = currBlock->getEntry(); + for (;tt1 != currBlock->getExit(); tt1 = tt1->getNextTreeTop()) + { + TR::Node * storeNode1 = tt1->getNode()->getStoreNode(); + if (storeNode1 && storeNode1->getOpCode().isStoreDirect()) + { + TR::RegisterCandidate * rc = 0; + TR::SymbolReference * symRef = tt1->getNode()->getSymbolReference(); + if ((symRef->getSymbol()->isAutoOrParm() ) && + isSymRefAvailable(symRef)) + { + rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); + } + + if (rc) + { + int32_t weight = 1; + if (branchBlock->findFirstReference(symRef->getSymbol(), comp()->incVisitCount())) + { + if (branchBlock->getStructureOf()) + optimizer()->getStaticFrequency(branchBlock, &weight); + //printf("Adding symRef %d in block_%d\n", symRef->getReferenceNumber(), branchBlock->getNumber()); + rc->addBlock(branchBlock, weight); + } + } + } + } + } + } + } + } void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Block **cfgBlocks, int32_t numberOfNodes, bool onlySelectedCandidates) { @@ -2658,10 +2940,8 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl TR::Symbol *sym; TR::RegisterCandidates *candidates = comp()->getGlobalRegisterCandidates(); - int32_t freqThreshold = isHot(comp()) ? 0 : comp()->getOptions()->getGRAFreqThresholdAtWarm(); // Interested blocks consist of all blocks except for entry, exit and exception handlers - // TR_BitVector interestedBlocks(numberOfNodes, comp()->trMemory()->currentStackRegion()); TR_BitVector tmp(numberOfNodes, comp()->trMemory()->currentStackRegion()); for (node = cfg->getFirstNode(); node != NULL; node = node->getNext()) @@ -2672,73 +2952,42 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl interestedBlocks.set(block->getNumber()); } - // First create a bit vector of auto and parm symbols - // Guess at size of bit vector to use by getting first block's LiveLocals bitvector and check its size - TR_BitVector *guess = toBlock(cfg->getFirstNode())->getLiveLocals(); - int32_t guessSize = 1024; - if (guess && guess->numChunks()*BITS_IN_CHUNK > guessSize) - guessSize = guess->numChunks()*BITS_IN_CHUNK; - - TR::ParameterSymbol *paramCursor = paramIterator.getFirst(); - TR_BitVector autoAndParmLiveLocalIndex(guessSize, trMemory(), stackAlloc, growable); - TR_Array registerCandidateByIndex(trMemory(), guessSize, false, stackAlloc); - autoAndParmLiveLocalIndex.empty(); - int32_t i; - while (paramCursor != NULL) + // + // Offer parameters first + // + for (TR::ParameterSymbol *paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext()) { - TR::SymbolReference *symRef = methodSymbol->getParmSymRef(paramCursor->getSlot()); - TR::RegisterCandidate *rc = NULL; - if (paramCursor->isReferencedParameter() && isTypeAvailable(symRef)) + symRef = methodSymbol->getParmSymRef(paramCursor->getSlot()); + + if ((paramCursor->isReferencedParameter() && isTypeAvailable(symRef)) && + !onlySelectedCandidates) { - rc = comp()->getGlobalRegisterCandidates()->find(paramCursor); - if (!rc) - { - // Check there is an interested block that references the symref - tmp.empty(); - tmp |= *comp()->getGlobalRegisterCandidates()->getBlocksReferencingSymRef(symRef->getReferenceNumber()); - tmp &= interestedBlocks; - if (!tmp.isEmpty()) - { - rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); - } - else - { - paramCursor = paramIterator.getNext(); - continue; - } - } + if (!isSymRefAvailable(symRef)) + continue; - // All live interested blocks will be candidates - rc->getBlocks().getCandidateBlocks() |= tmp; + int32_t symRefNumber = symRef->getReferenceNumber(); + + // Check there is an interested block that references the symref + tmp.empty(); + tmp |= *candidates->getBlocksReferencingSymRef(symRefNumber); + tmp &= interestedBlocks; + if (tmp.isEmpty()) + continue; + + TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); + + // All interested blocks will be candidates + rc->getBlocks().getCandidateBlocks() |= interestedBlocks; // Increment the number of loads and stores for all candidate blocks // that also reference the symref TR_BitVectorIterator bvi(tmp); while (bvi.hasMoreElements()) - { - int32_t nextBlockNum = bvi.getNextElement(); - TR::Block *nextBlock = cfgBlocks[nextBlockNum]; - if (isHot(comp()) || (nextBlock->getFrequency() > freqThreshold)) - { - int32_t executionFrequency = 1; - if (nextBlock->getStructureOf()) - optimizer()->getStaticFrequency(nextBlock, &executionFrequency); - rc->getBlocks().incNumberOfLoadsAndStores(nextBlockNum, executionFrequency); - } - } + rc->getBlocks().incNumberOfLoadsAndStores(bvi.getNextElement(), 1); - static const char *doit = feGetEnv("TR_AddAllBlocksForLinkageRegs"); - if (doit != NULL) - { - if (paramCursor->getLinkageRegisterIndex() >= 0) - rc->addAllBlocks(); - } + if (paramCursor->getLinkageRegisterIndex() >= 0) + rc->addAllBlocks(); } - - i = paramCursor->getLiveLocalIndex(); - autoAndParmLiveLocalIndex.set(i); - registerCandidateByIndex[i] = rc; - paramCursor = paramIterator.getNext(); } // @@ -2750,9 +2999,8 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl if (symRef && isSymRefAvailable(symRef)) { sym = symRef->getSymbol(); - if (sym && sym->isAuto()) + if (sym) { - TR::AutomaticSymbol *autoCursor = sym->getAutoSymbol(); if (candidates->aliasesPreventAllocation(comp(),symRef)) { if (comp()->getOptions()->trace(OMR::tacticalGlobalRegisterAllocator)) @@ -2760,9 +3008,11 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl continue; } - if (methodSymbol->getAutomaticList().find(sym->castToAutoSymbol()) && - !onlySelectedCandidates) + if ((sym->isAuto() && + methodSymbol->getAutomaticList().find(sym->castToAutoSymbol()) && + !onlySelectedCandidates)) { + int32_t symRefNumber = symRef->getReferenceNumber(); // Check there is an interested block that references the symref @@ -2776,64 +3026,107 @@ void TR_GlobalRegisterAllocator::offerAllAutosAndRegisterParmAsCandidates(TR::Bl if (sym->isMethodMetaData() && rc && rc->initialBlocksWeightComputed()) continue; - // All live interested blocks will be candidates - rc->getBlocks().getCandidateBlocks() |= tmp; + // All interested blocks will be candidates + rc->getBlocks().getCandidateBlocks() |= interestedBlocks; // Increment the number of loads and stores for all candidate blocks // that also reference the symref TR_BitVectorIterator bvi(tmp); while (bvi.hasMoreElements()) - { - int32_t nextBlockNum = bvi.getNextElement(); - TR::Block *nextBlock = cfgBlocks[nextBlockNum]; - if (isHot(comp()) || (nextBlock->getFrequency() > freqThreshold)) - { - int32_t executionFrequency = 1; - if (nextBlock->getStructureOf()) - optimizer()->getStaticFrequency(nextBlock, &executionFrequency); - rc->getBlocks().incNumberOfLoadsAndStores(nextBlockNum, executionFrequency); - } - } + rc->getBlocks().incNumberOfLoadsAndStores(bvi.getNextElement(), 1); rc->setInitialBlocksWeightComputed(true); - - i = autoCursor->getLiveLocalIndex(); - autoAndParmLiveLocalIndex.set(i); - registerCandidateByIndex[i] = rc; } } } } + } - // Now visit all blocks and intersect each blocks LiveLocals with autoAndParmLiveLocalIndex. - // For each intersected bit ensure BlockInfo exists for the candidate and initialize it - // to zero NumberOfLoadsAndStores if it does not exist - guessSize = autoAndParmLiveLocalIndex.numChunks()*BITS_IN_CHUNK; - TR_BitVector intersection(guessSize, trMemory(), stackAlloc, growable); - for (TR::CFGNode * block = cfg->getFirstNode(); block; block = block->getNext()) + +void TR_GlobalRegisterAllocator::offerAllFPAutosAndParmsAsCandidates(TR::Block * *cfgBlocks, int32_t numberOfNodes) + { + LexicalTimer t("TR_GlobalRegisterAllocator::offerAllFPAutosAndParmsAsCandidates", comp()->phaseTimer()); + + TR::CFG *cfg = comp()->getFlowGraph(); + TR::CFGNode *node; + TR::Block *block, *startBlock=toBlock(cfg->getStart()), *endBlock=toBlock(cfg->getEnd()); + int32_t symRefCount = comp()->getSymRefCount(); + TR::SymbolReferenceTable *symRefTab = comp()->getSymRefTab(); + TR::SymbolReference *symRef; + TR::Symbol *sym; + TR::ResolvedMethodSymbol *methodSymbol = comp()->getJittedMethodSymbol(); + TR::RegisterCandidates *candidates = comp()->getGlobalRegisterCandidates(); + + // Interested blocks consist of all blocks except for entry, exit and exception handlers + TR_BitVector interestedBlocks(numberOfNodes, comp()->trMemory()->currentStackRegion()); + TR_BitVector tmp(numberOfNodes, comp()->trMemory()->currentStackRegion()); + for (node = cfg->getFirstNode(); node != NULL; node = node->getNext()) + { + block = toBlock(node); + if (block == startBlock || block == endBlock || (!block->getExceptionPredecessors().empty()) || !cfgBlocks[block->getNumber()]) + continue; + + interestedBlocks.set(block->getNumber()); + } + + // + // Offer all FP autos now + // + for (int32_t symRefNumber = symRefTab->getIndexOfFirstSymRef(); symRefNumber < symRefCount; symRefNumber++) { - TR_BitVector * liveLocals = toBlock(block)->getLiveLocals(); - int32_t frequency = toBlock(block)->getFrequency(); - if ((isHot(comp()) || (frequency > freqThreshold)) && (cg()->getLiveLocals() && liveLocals)) + symRef = symRefTab->getSymRef(symRefNumber); + if (symRef) { - if (block != cfg->getStart() && block != cfg->getEnd()) + sym = symRef->getSymbol(); + if (sym) { - intersection = autoAndParmLiveLocalIndex; - intersection &= *liveLocals; - TR_BitVectorIterator bvi(intersection); - while (bvi.hasMoreElements()) + + if ((sym->getDataType() == TR::Float + || sym->getDataType() == TR::Double + ) && + isTypeAvailable(symRef) && + ((sym->isAuto() && methodSymbol->getAutomaticList().find(sym->castToAutoSymbol())) || + (sym->isParm() && methodSymbol->getParameterList().find(sym->castToParmSymbol()) && sym->isReferencedParameter()))) { - int32_t autoOrParm = bvi.getNextElement(); - TR::RegisterCandidate *rc=registerCandidateByIndex[autoOrParm]; - if(!rc->getBlocks().find(block->getNumber())) - rc->getBlocks().setNumberOfLoadsAndStores(toBlock(block)->getNumber(), 0); + + int32_t symRefNumber = symRef->getReferenceNumber(); + + // Check there is an interested block that references the symref + tmp.empty(); + tmp |= *candidates->getBlocksReferencingSymRef(symRefNumber); + tmp &= interestedBlocks; + if (tmp.isEmpty()) + continue; + + TR::RegisterCandidate *rc = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); + + // All interested blocks will be candidates + rc->getBlocks().getCandidateBlocks() |= interestedBlocks; + + // Increment the number of loads and stores for all candidate blocks + // that also reference the symref + TR_BitVectorIterator bvi(tmp); + while (bvi.hasMoreElements()) + rc->getBlocks().incNumberOfLoadsAndStores(bvi.getNextElement(), 1); } } } } } +void +TR_GlobalRegisterAllocator::findLoopAutoRegisterCandidates() + { + LexicalTimer t("TR_GlobalRegisterAllocator::findLoopAutoRegisterCandidates", comp()->phaseTimer()); + TR::StackMemoryRegion stackMemoryRegion(*trMemory()); + + TR::CFG * cfg = comp()->getFlowGraph(); + vcount_t visitCount = comp()->incVisitCount(); + TR_Structure *rootStructure = comp()->getFlowGraph()->getStructure(); + SymRefCandidateMap * registerCandidates = new (trStackMemory()) SymRefCandidateMap((SymRefCandidateMapComparator()), SymRefCandidateMapAllocator(trMemory()->currentStackRegion())); + findLoopsAndCorrespondingAutos(NULL, visitCount, *registerCandidates); + } TR_GlobalRegisterAllocator::BlockInfo & TR_GlobalRegisterAllocator::blockInfo(int32_t i) @@ -2843,6 +3136,189 @@ TR_GlobalRegisterAllocator::blockInfo(int32_t i) return _blockInfo[i]; } +void +TR_GlobalRegisterAllocator::findLoopsAndCorrespondingAutos(TR_StructureSubGraphNode *structureNode, vcount_t visitCount, SymRefCandidateMap ®isterCandidates) + { + TR_Structure *structure; + if (structureNode) + structure = structureNode->getStructure(); + else + structure = comp()->getFlowGraph()->getStructure(); + + if (structure->asRegion()) + { + TR_RegionStructure *regionStructure = structure->asRegion(); + TR_StructureSubGraphNode *subNode; + TR_Structure *subStruct = NULL; + TR_RegionStructure::Cursor si(*regionStructure); + for (subNode = si.getCurrent(); subNode != NULL; subNode = si.getNext()) + { + subStruct = subNode->getStructure(); + findLoopsAndCorrespondingAutos(subNode, visitCount, registerCandidates); + } + + if (!regionStructure->isAcyclic() && structureNode) + { + TR_ScratchList blocksInLoop(trMemory()); + regionStructure->getBlocks(&blocksInLoop); + + TR_BitVector assignedAutosInCurrentLoop(_origSymRefCount, trMemory(), stackAlloc); + + TR_BitVector *symsThatShouldNotBeAssignedInCurrentLoop = NULL; + TR_BitVector *symsThatShouldBeAssignedInCurrentLoop = NULL; + + bool excludeInvariantsEnabled = comp()->cg()->excludeInvariantsFromGRAEnabled() && + (!comp()->getOption(TR_DisableRXusage)); + + if (excludeInvariantsEnabled) + { + symsThatShouldNotBeAssignedInCurrentLoop = new (trStackMemory()) TR_BitVector(_origSymRefCount, trMemory(), stackAlloc); + symsThatShouldNotBeAssignedInCurrentLoop->setAll(_origSymRefCount); + symsThatShouldBeAssignedInCurrentLoop = new (trStackMemory()) TR_BitVector(_origSymRefCount, trMemory(), stackAlloc); + } + + ListIterator blocksIt(&blocksInLoop); + TR::Block *nextBlock; + bool hasCatchBlock = false; + for (nextBlock = blocksIt.getFirst(); nextBlock; nextBlock=blocksIt.getNext()) + { + if (nextBlock->isCatchBlock()) + hasCatchBlock = true; + } + + TR_BitVector *oldCandidatesSignExtendedInThisLoop = NULL; + if (_candidatesSignExtendedInThisLoop) + { + oldCandidatesSignExtendedInThisLoop = new (trStackMemory()) TR_BitVector(_origSymRefCount, trMemory(), stackAlloc); + *oldCandidatesSignExtendedInThisLoop = *_candidatesSignExtendedInThisLoop; + _candidatesSignExtendedInThisLoop->empty(); + } + + for (nextBlock = blocksIt.getFirst(); nextBlock; nextBlock=blocksIt.getNext()) + { + if (nextBlock->getVisitCount() != visitCount) + { + static uint32_t numIter = 0; + if (((++numIter) & 0x3f)==0 && comp()->compilationShouldBeInterrupted(GRA_FIND_LOOPS_AND_CORRESPONDING_AUTOS_BLOCK_CONTEXT)) + { + comp()->failCompilation("interrupted in GRA-findLoopsAndCorrspondingAuto-block"); + } + nextBlock->setVisitCount(visitCount); + int32_t executionFrequency = 1; + if (nextBlock->getStructureOf()) + optimizer()->getStaticFrequency(nextBlock, &executionFrequency); + + TR::TreeTop *currentTree = nextBlock->getEntry(); + TR::TreeTop *exitTree = nextBlock->getExit(); + while (currentTree != exitTree) + { + TR::Node *currentNode = currentTree->getNode(); + TR::Node *arrayAccess = NULL; + markAutosUsedIn(currentNode, NULL, NULL, &arrayAccess, nextBlock, &blocksInLoop, visitCount, executionFrequency, registerCandidates, &assignedAutosInCurrentLoop, symsThatShouldNotBeAssignedInCurrentLoop, hasCatchBlock); + currentTree = currentTree->getNextRealTreeTop(); + } + } + } + + if (oldCandidatesSignExtendedInThisLoop) + *_candidatesSignExtendedInThisLoop = *oldCandidatesSignExtendedInThisLoop; + + for (auto succ = structureNode->getSuccessors().begin(); succ != structureNode->getSuccessors().end(); ++succ) + { + TR_Structure *exitStructure = (*succ)->getTo()->asStructureSubGraphNode()->getStructure(); + TR::Block *exitBlock = NULL; + if (exitStructure) + exitBlock = exitStructure->getEntryBlock(); + + // Turning off this code below. Reason : extending the live range till the + // exit blocks may lead to slowdowns if the loop (whose exit we are extending + // the live range to) was actually run for only a few iterations and + // the outer loop was in fact long running. Making the candidate live on entry + // to an exit out of the inner loop may cause it to be loaded up into the register + // on some hot path in the outer loop and this extra load could degrade performance. + // This scenario actually occurred in db shell sort, where the innermost loop is + // String compareTo code which does not run too long, but the outer loops + // are very hot. + // + if (0 && exitBlock && + (exitBlock != comp()->getFlowGraph()->getEnd())) + { + TR_BitVectorIterator bvi(assignedAutosInCurrentLoop); + while (bvi.hasMoreElements()) + { + int32_t nextCandidate = bvi.getNextElement(); + //dumpOptDetails(comp(), "For loop %d exit block_%d candidate %d\n", structureNode->getNumber(), exitBlock->getNumber(), nextCandidate); + TR::RegisterCandidate *rc = registerCandidates[nextCandidate]; + rc->addBlock(exitBlock, 0); + rc->addLoopExitBlock(exitBlock); + } + } + } + + if (symsThatShouldNotBeAssignedInCurrentLoop) + { + *symsThatShouldNotBeAssignedInCurrentLoop &= assignedAutosInCurrentLoop; + + TR_BitVectorIterator bvi(*symsThatShouldNotBeAssignedInCurrentLoop); + while (bvi.hasMoreElements()) + { + int32_t nextCandidate = bvi.getNextElement(); + if (!symsThatShouldBeAssignedInCurrentLoop->get(nextCandidate)) + { + *_temp2 = *symsThatShouldBeAssignedInCurrentLoop; + TR::RegisterCandidate *rc = registerCandidates[nextCandidate]; + TR::SymbolReference *symRef = rc->getSymbolReference(); + ListIterator pairs(&_pairedSymbols); + bool doNotRemoveThisCandidate = false; + TR_PairedSymbols *p; + for (p = pairs.getFirst(); p; p = pairs.getNext()) + { + TR::SymbolReference *otherSymRef = NULL; + if (p->_symRef1 == symRef) + otherSymRef = p->_symRef2; + else if (p->_symRef2 == symRef) + otherSymRef = p->_symRef1; + + if (otherSymRef && symsThatShouldNotBeAssignedInCurrentLoop->get(otherSymRef->getReferenceNumber())) + { + TR::RegisterCandidate *otherCandidate = registerCandidates[otherSymRef->getReferenceNumber()]; + if (otherCandidate->countNumberOfLoadsAndStoresInBlocks(&blocksInLoop) < rc->countNumberOfLoadsAndStoresInBlocks(&blocksInLoop)) + symsThatShouldBeAssignedInCurrentLoop->set(otherSymRef->getReferenceNumber()); + else + doNotRemoveThisCandidate = true; + } + } + + if (!doNotRemoveThisCandidate) + { + //printf("Removing candidate in %s\n", _compilation->getCurrentMethod()->signature()); + blocksIt.reset(); + for (nextBlock = blocksIt.getCurrent(); nextBlock; nextBlock=blocksIt.getNext()) + rc->removeBlock(nextBlock); + + for (auto succ = structureNode->getSuccessors().begin(); succ != structureNode->getSuccessors().end(); ++succ) + { + TR_Structure *exitStructure = (*succ)->getTo()->asStructureSubGraphNode()->getStructure(); + TR::Block *exitBlock = NULL; + if (exitStructure) + exitBlock = exitStructure->getEntryBlock(); + if (exitBlock && + (exitBlock != comp()->getFlowGraph()->getEnd())) + { + rc->removeBlock(exitBlock); + rc->removeLoopExitBlock(exitBlock); + } + } + } + else + *symsThatShouldBeAssignedInCurrentLoop = *_temp2; + } + } + } + } + } + } + bool TR_GlobalRegisterAllocator::isDependentStore(TR::Node *node, const TR_UseDefInfo::BitVector &defs, TR::SymbolReference *symRef, bool *seenLoad) { @@ -2877,6 +3353,231 @@ bool TR_GlobalRegisterAllocator::isDependentStore(TR::Node *node, const TR_UseDe } + +void +TR_GlobalRegisterAllocator::markAutosUsedIn( + TR::Node *node, + TR::Node *parent, + TR::Node *grandParent, + TR::Node **currentArrayAccess, + TR::Block *block, + List *blocksInLoop, + vcount_t visitCount, + int32_t executionFrequency, + SymRefCandidateMap ®isterCandidates, + TR_BitVector *assignedAutosInCurrentLoop, + TR_BitVector *symsThatShouldNotBeAssignedInCurrentLoop, + bool hasCatchBlock) + { + bool enableSignExtGRA = false; // enable for other platforms later + + static char *doit = feGetEnv("TR_SIGNEXTGRA"); + if (NULL != doit) + enableSignExtGRA = true; + + if (comp()->target().cpu.isZ()) + { + enableSignExtGRA = true; + static char *doit2 = feGetEnv("TR_NSIGNEXTGRA"); + if (NULL != doit2) + enableSignExtGRA = false; + } + + TR::Node *origNode = NULL; + TR::Node *prevArrayAccess = NULL; + + if (node->getVisitCount() == visitCount) + return; + + node->setVisitCount(visitCount); + + if (node->getOpCode().isLoadVarDirect() && node->getSymbolReference()->getSymbol()->isAuto()) + { + TR_UseDefInfo *info = optimizer()->getUseDefInfo(); + if (comp()->target().is64Bit() && info && + (parent->getOpCodeValue() == TR::i2l) && node->isNonNegative() && enableSignExtGRA) + { + node->setSkipSignExtension(true); + _candidatesNeedingSignExtension->set(node->getSymbolReference()->getReferenceNumber()); + + TR_BlockStructure *blockStructure = block->getStructureOf(); + //printf("Skip sign extension at node %p in %s\n", node, comp->getCurrentMethod()->signature()); + int32_t useIndex = node->getUseDefIndex(); + TR_UseDefInfo::BitVector defs(comp()->allocator()); + if (info->getUseDef(defs, useIndex)) + { + TR_UseDefInfo::BitVector::Cursor cursor(defs); + for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne()) + { + int32_t defIndex = info->getFirstDefIndex() + (int32_t) cursor; + if (defIndex < info->getFirstRealDefIndex()) // def is unseen--can't mark this node as skippable + { + node->setSkipSignExtension(false); + //printf("Parm def, so cannot skip sign extension at node %p in %s\n", + // node, comp->getCurrentMethod()->signature()); + break; + } + TR::Node *defNode = info->getNode(defIndex); + if (defNode->getOpCode().isStore()) + { + TR::Symbol *sym = defNode->getSymbolReference()->getSymbol(); + // TR_ASSERT((sym == node->getSymbolReference()->getSymbol()), "Symbols must match between a use and a def\n"); + // Comment out the above assume because: + // - The assume came from JAVA where there could not be aliasing with autos + // - If there is an alias causing a different sym then we will not have a candidate anyways so it does not matter + // - avoid extra computation if symbols do not match by adding extra condition below. + if (sym == node->getSymbolReference()->getSymbol() && + (sym->isAuto() )) + { + bool seenLoad = false; + bool dependentStore = isDependentStore(defNode, defs, node->getSymbolReference(), &seenLoad); + if (dependentStore && + defNode->getFirstChild()->isNonNegative() && + seenLoad) + { + //printf("Skip sign extension at def node %p in %s\n", defNode, comp->getCurrentMethod()->signature()); + TR::TreeTop *defTree = info->getTreeTop(defIndex); + TR::Block *defBlock = defTree->getEnclosingBlock(); + TR_BlockStructure *defBlockStructure = defBlock->getStructureOf(); + if (comp()->getFlowGraph()->getStructure() && + blockStructure && defBlockStructure && + (blockStructure->getContainingLoop() == defBlockStructure->getContainingLoop())) + continue; + } + + if (!defNode->getFirstChild()->isNonNegative()) + { + node->setSkipSignExtension(false); + break; + } + else + defNode->setNeedsSignExtension(true); + } + } + } + } + } + } + + if (node->getOpCode().isLoadVarDirect() || node->getOpCode().isStoreDirect()) + { + TR::CFG *cfg = comp()->getFlowGraph(); + + TR::SymbolReference *symRef = node->getSymbolReference(); + if (symRef->getSymbol()->isAutoOrParm() && isSymRefAvailable(symRef, blocksInLoop)) + { + TR::RegisterCandidate *rc = registerCandidates[symRef->getReferenceNumber()]; + if (!rc) + { + registerCandidates[symRef->getReferenceNumber()] = comp()->getGlobalRegisterCandidates()->findOrCreate(symRef); + rc = registerCandidates[symRef->getReferenceNumber()]; + } + + if (!assignedAutosInCurrentLoop->get(symRef->getReferenceNumber())) + { + assignedAutosInCurrentLoop->set(symRef->getReferenceNumber()); + ListIterator blocksIt(blocksInLoop); + TR::Block *nextBlock; + for (nextBlock = blocksIt.getCurrent(); nextBlock; nextBlock=blocksIt.getNext()) + { + if (!rc->hasBlock(nextBlock)) + { + if (nextBlock != cfg->getStart()) + rc->addBlock(nextBlock, 0); + } + } + } + + //dumpOptDetails(comp(), "Adding %d numLoadsStores to candidate %d in block_%d\n", executionFrequency, rc->getSymbolReference()->getReferenceNumber(), block->getNumber()); + + if (block != cfg->getStart()) + { + // We should not consider the cost of the copies for live-range splitting, + // because this cost is meaningful only when one of operand (source or destination) receives a register. + // Note: Although we need to re-calculate the cost when one of operand receives a register, + // we do not do so currently. + // + if ((node->getOpCode().isStoreDirect() && isSplittingCopy(node)) || + (node->getOpCode().isLoadVarDirect() && parent && parent->getOpCode().isStoreDirect() && isSplittingCopy(parent))) + { + rc->addBlock(block, 0); + } + else + { + if (comp()->target().cpu.isZ() && + rc->getSymbolReference()->getSymbol()->getDataType() == TR::Address && + parent && + (((parent->getOpCode().isStoreIndirect() || + parent->getOpCode().isLoadIndirect()) && + parent->getFirstChild() == node) || + (parent->getOpCode().isAdd() && + grandParent && + (grandParent->getOpCode().isStoreIndirect() || + grandParent->getOpCode().isLoadIndirect())))) + { + rc->addBlock(block, executionFrequency*10); + if (trace()) + dumpOptDetails(comp(), "Increased weight of candidate #%d in block_%d to reduce AGI\n", rc->getSymbolReference()->getReferenceNumber(), block->getNumber()); + } + else + { + rc->addBlock(block, executionFrequency); + } + } + } + + if (((node->getReferenceCount() > 1) || + node->getOpCode().isStoreDirect()) && + symsThatShouldNotBeAssignedInCurrentLoop) + symsThatShouldNotBeAssignedInCurrentLoop->reset(symRef->getReferenceNumber()); + + if (symsThatShouldNotBeAssignedInCurrentLoop && + node->getOpCode().isLoadVarDirect()) + { + if (parent->getNumChildren() == 2) + { + if (parent->getSecondChild()->getOpCode().isLoadConst()) + symsThatShouldNotBeAssignedInCurrentLoop->reset(symRef->getReferenceNumber()); + else + { + TR::Node *otherChild; + if (parent->getFirstChild() == node) + otherChild = parent->getSecondChild(); + else + otherChild = parent->getFirstChild(); + + if (otherChild->getOpCode().isLoadVarDirect() && + (otherChild->getSymbolReference()->getSymbol()->isAutoOrParm()) + ) + findOrCreatePairedSymbols(symRef, otherChild->getSymbolReference()); + } + } + } + } + } + + int32_t childNum; + for (childNum=0;childNumgetNumChildren();childNum++) + { + TR::Node *oldArrayAccess = *currentArrayAccess; + markAutosUsedIn(node->getChild(childNum), node, parent, currentArrayAccess, block, blocksInLoop, visitCount, executionFrequency, registerCandidates, assignedAutosInCurrentLoop, symsThatShouldNotBeAssignedInCurrentLoop, hasCatchBlock); + *currentArrayAccess = oldArrayAccess; + } + + TR::ILOpCode &opCode = node->getOpCode(); + TR::ILOpCodes opCodeValue = opCode.getOpCodeValue(); + + if (symsThatShouldNotBeAssignedInCurrentLoop && + assignedAutosInCurrentLoop && + ((opCode.hasSymbolReference() && + opCode.isIndirect()) || + (opCode.isArrayLength()) || + !(opCode.isAdd() || opCode.isSub() || opCode.isMul() || + opCode.isBooleanCompare() || opCode.isNullCheck() || opCode.isBndCheck()))) + findSymsUsedInIndirectAccesses(node, symsThatShouldNotBeAssignedInCurrentLoop, assignedAutosInCurrentLoop, true); + } + + void TR_GlobalRegisterAllocator::signExtendAllDefNodes(TR::Node *defNode, List *defNodes) { LexicalTimer t("TR_GlobalRegisterAllocator::signExtendAllDefNodes", comp()->phaseTimer()); diff --git a/compiler/optimizer/GlobalRegisterAllocator.hpp b/compiler/optimizer/GlobalRegisterAllocator.hpp index 4c5c1cfaff7..d0e98fb2ace 100644 --- a/compiler/optimizer/GlobalRegisterAllocator.hpp +++ b/compiler/optimizer/GlobalRegisterAllocator.hpp @@ -169,12 +169,16 @@ class TR_GlobalRegisterAllocator : public TR::Optimization private: + void findIfThenRegisterCandidates(); + void findLoopsAndCorrespondingAutos(TR_StructureSubGraphNode *, vcount_t, SymRefCandidateMap &); void findLoopsAndAutosNoStructureInfo(vcount_t visitCount, TR::RegisterCandidate **registerCandidates); void initializeControlFlowInfo(); + virtual void markAutosUsedIn(TR::Node *, TR::Node *, TR::Node *, TR::Node **, TR::Block *, List *, vcount_t, int32_t, SymRefCandidateMap &, TR_BitVector *, TR_BitVector *, bool); void signExtendAllDefNodes(TR::Node *, List *); void findSymsUsedInIndirectAccesses(TR::Node *, TR_BitVector *, TR_BitVector *, bool); void offerAllAutosAndRegisterParmAsCandidates(TR::Block **, int32_t, bool onlySelectedCandidates = false); + void offerAllFPAutosAndParmsAsCandidates(TR::Block **, int32_t); bool allocateForSymRef(TR::SymbolReference *symRef); bool allocateForType(TR::DataType dt); @@ -219,7 +223,8 @@ class TR_GlobalRegisterAllocator : public TR::Optimization void appendStoreToBlock(TR::SymbolReference *storeSymRef, TR::SymbolReference *loadSymRef, TR::Block *block, TR::Node *node); */ protected: - TR::Block * createNewSuccessorBlock(TR::Block *, TR::Block *, TR::TreeTop *, TR::Node *, TR::RegisterCandidate * rc); + void findLoopAutoRegisterCandidates(); + TR::Block * createNewSuccessorBlock(TR::Block *, TR::Block *, TR::TreeTop *, TR::Node *, TR::RegisterCandidate * rc); void appendGotoBlock(TR::Block *gotoBlock, TR::Block *curBlock); void transformBlock(TR::TreeTop *); bool isTypeAvailable(TR::SymbolReference *symref); diff --git a/compiler/optimizer/OMRRegisterCandidate.cpp b/compiler/optimizer/OMRRegisterCandidate.cpp index 4743027fedc..4507c92e68d 100644 --- a/compiler/optimizer/OMRRegisterCandidate.cpp +++ b/compiler/optimizer/OMRRegisterCandidate.cpp @@ -106,49 +106,32 @@ void OMR::GlobalSet::collectBlocks() TR_BitVectorIterator bvi(references); TR::NodeChecklist visited(_comp); - TR::TreeTop *startTree = _comp->getStartTree(); - TR::TreeTop *endTree = NULL; - TR::TreeTop *treeTop; - TR::TreeTop *exitTreeTop = startTree->getExtendedBlockExitTreeTop(); - // Process each block in treetop order - // - for (treeTop = startTree; (treeTop != endTree); treeTop = exitTreeTop->getNextTreeTop()) + for (TR::CFGNode *node = _comp->getFlowGraph()->getFirstNode(); node; node = node->getNext()) { - TR::Block *block = NULL; - exitTreeTop = treeTop->getExtendedBlockExitTreeTop(); + TR::Block *block = toBlock(node); + if (!block) + continue; // Collect all autos/parms used in this block + references.empty(); visited.remove(visited); - for (TR::TreeTop * tt = treeTop; tt && tt != exitTreeTop; tt = tt->getNextTreeTop()) - { - if (tt->getNode()->getOpCodeValue() == TR::BBStart) - { - references.empty(); - block = tt->getNode()->getBlock(); - } - + for (TR::TreeTop * tt = block->getEntry(); tt && tt != block->getExit(); tt = tt->getNextTreeTop()) collectReferencedAutoSymRefs(tt->getNode(), references, visited); - if (tt->getNextTreeTop()->getNode()->getOpCodeValue() == TR::BBEnd) + // Set this block as referencing the collected autos/params + // Also set any blocks that extend this one + bvi.setBitVector(references); + while (bvi.hasMoreElements()) + { + uint32_t symRefNum = bvi.getNextElement(); + auto lookup = _blocksPerAuto.find(symRefNum); + if (lookup != _blocksPerAuto.end()) + lookup->second->set(block->getNumber()); + else { - // Set this block as referencing the collected autos/params - // - bvi.setBitVector(references); - while (bvi.hasMoreElements()) - { - uint32_t symRefNum = bvi.getNextElement(); - auto lookup = _blocksPerAuto.find(symRefNum); - if (lookup != _blocksPerAuto.end()) - { - lookup->second->set(block->getNumber()); - } - else - { - TR_BitVector *blocks = new (_region) TR_BitVector(_region); - blocks->set(block->getNumber()); - _blocksPerAuto[symRefNum] = blocks; - } - } + TR_BitVector *blocks = new (_region) TR_BitVector(_region); + blocks->set(block->getNumber()); + _blocksPerAuto[symRefNum] = blocks; } } }