From 46e59bffd9da107b8e078adc3c2c0a75711c541c Mon Sep 17 00:00:00 2001 From: Filip Jeremic Date: Wed, 24 Feb 2021 12:49:08 -0500 Subject: [PATCH] Deprecate PrefetchInsertion optimization The optimization has O(n^2) complexity and is currently disabled for forward array traversals, and recently we've had to disable it under all array traversals for concurrent scavenge. As read barriers become more prominent, the introduction of the dataAddr pointer in the header will force us to further limit this optimization. This optimization was originally introduced in 2010 for z196 z/Architecture processor to accelerate SPEC workloads. It showed minimal improvement (1.5%) according to historical data dug up prior to open sourcing the project. The hardware is hardly used today since it is so old. In recent iterations of the hardware, software prefetching has been discouraged due to advancements in hardware prefetching. Similar observations were seen when we removed prefetching from our zero memory routines. Moreover, the phantom words past the end of the GC heap were removed several years ago, forcing us to disable this optimization for forward array traversals. As mentioned previously we have been encountering bugs in this optimization recently due to changes in the JVM and the fact that this optimization introduces loads on the heap which previously did not exist. In the PR we detail performance numbers for several workloads where we believed this optimization may have provided a benefit. Across all platforms measured we no longer see a benefit (and sometimes a degradation) from running this optimization. Here we deprecate the optimization as we still have access to the original source code in the history if we ever need to resurrect this effort. Signed-off-by: Filip Jeremic --- compiler/control/OMROptions.cpp | 15 +- compiler/optimizer/CMakeLists.txt | 3 +- compiler/optimizer/OMROptimizationGroups.enum | 3 +- compiler/optimizer/OMROptimizationManager.cpp | 5 +- compiler/optimizer/OMROptimizations.enum | 3 +- compiler/optimizer/OMROptimizer.cpp | 15 +- compiler/optimizer/OptimizationStrategies.hpp | 3 +- compiler/optimizer/PrefetchInsertion.cpp | 518 ------------------ compiler/optimizer/PrefetchInsertion.hpp | 70 --- fvtest/compilertest/build/files/common.mk | 1 - jitbuilder/build/files/common.mk | 1 - 11 files changed, 7 insertions(+), 630 deletions(-) delete mode 100644 compiler/optimizer/PrefetchInsertion.cpp delete mode 100644 compiler/optimizer/PrefetchInsertion.hpp diff --git a/compiler/control/OMROptions.cpp b/compiler/control/OMROptions.cpp index 552fd659743..78fb3752afb 100644 --- a/compiler/control/OMROptions.cpp +++ b/compiler/control/OMROptions.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2020 IBM Corp. and others + * Copyright (c) 2000, 2021 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -480,7 +480,6 @@ TR::OptionTable OMR::Options::_jitOptions[] = { {"disablePRBE", "O\tdisable partial redundancy branch elimination", SET_OPTION_BIT(TR_DisablePRBE), "F"}, {"disablePRE", "O\tdisable partial redundancy elimination", TR::Options::disableOptimization, partialRedundancyElimination, 0, "P"}, {"disablePreexistenceDuringGracePeriod","O\tdisable preexistence during CLP grace period", SET_OPTION_BIT(TR_DisablePrexistenceDuringGracePeriod), "F"}, - {"disablePrefetchInsertion", "O\tdisable prefetch insertion", TR::Options::disableOptimization, prefetchInsertion, 0, "P"}, {"disableProfiledInlining", "O\tdisable inlining based on profiled this values", SET_OPTION_BIT(TR_DisableProfiledInlining), "F"}, {"disableProfiledMethodInlining", "O\tdisable inlining based on profiled methods", SET_OPTION_BIT(TR_DisableProfiledMethodInlining), "F"}, {"disableProfiledNodeVersioning", "O\tdisable profiled node versioning", TR::Options::disableOptimization, profiledNodeVersioning, 0, "P"}, @@ -1183,7 +1182,6 @@ TR::OptionTable OMR::Options::_jitOptions[] = { #endif {"traceOSRLiveRangeAnalysis", "L\ttrace OSR live range analysis", TR::Options::traceOptimization, osrLiveRangeAnalysis, 0, "P"}, {"tracePRE", "L\ttrace partial redundancy elimination", TR::Options::traceOptimization, partialRedundancyElimination, 0, "P"}, - {"tracePrefetchInsertion", "L\ttrace prefetch insertion", TR::Options::traceOptimization, prefetchInsertion, 0, "P"}, {"tracePREForSubNodeReplacement", "L\ttrace partial redundancy elimination focussed on optimal subnode replacement", SET_OPTION_BIT(TR_TracePREForOptimalSubNodeReplacement), "P" }, {"traceProfiledNodeVersioning", "L\ttrace profiled node versioning", TR::Options::traceOptimization, profiledNodeVersioning, 0, "P"}, #ifdef J9_PROJECT_SPECIFIC @@ -2626,17 +2624,6 @@ OMR::Options::jitPreProcess() self()->setOption(TR_EnableAnnotations); - TR::Compilation* comp = TR::comp(); - if (comp && TR::Compiler->om.canGenerateArraylets()) - { - _disabledOptimizations[prefetchInsertion] = true; - } - -#if defined(TR_HOST_ARM64) - // Prefetch is not supported on ARM64 yet - _disabledOptimizations[prefetchInsertion] = true; -#endif - self()->setOption(TR_DisableThunkTupleJ2I); // JSR292:TODO: Figure out how to do this without confusing startPCIfAlreadyCompiled self()->setOption(TR_DisableSeparateInitFromAlloc); diff --git a/compiler/optimizer/CMakeLists.txt b/compiler/optimizer/CMakeLists.txt index 17debb58b16..2b168414c76 100644 --- a/compiler/optimizer/CMakeLists.txt +++ b/compiler/optimizer/CMakeLists.txt @@ -1,5 +1,5 @@ ############################################################################### -# Copyright (c) 2017, 2019 IBM Corp. and others +# Copyright (c) 2017, 2021 IBM Corp. and others # # This program and the accompanying materials are made available under # the terms of the Eclipse Public License 2.0 which accompanies this @@ -74,7 +74,6 @@ compiler_library(optimizer ${CMAKE_CURRENT_LIST_DIR}/OSRDefAnalysis.cpp ${CMAKE_CURRENT_LIST_DIR}/PartialRedundancy.cpp ${CMAKE_CURRENT_LIST_DIR}/PreExistence.cpp - ${CMAKE_CURRENT_LIST_DIR}/PrefetchInsertion.cpp ${CMAKE_CURRENT_LIST_DIR}/Reachability.cpp ${CMAKE_CURRENT_LIST_DIR}/ReachingDefinitions.cpp ${CMAKE_CURRENT_LIST_DIR}/OMRRecognizedCallTransformer.cpp diff --git a/compiler/optimizer/OMROptimizationGroups.enum b/compiler/optimizer/OMROptimizationGroups.enum index ea64aac202f..741fc998a09 100644 --- a/compiler/optimizer/OMROptimizationGroups.enum +++ b/compiler/optimizer/OMROptimizationGroups.enum @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2016 IBM Corp. and others + * Copyright (c) 2000, 2021 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -57,7 +57,6 @@ OPTIMIZATION(lateLocalGroup) OPTIMIZATION(eachLocalAnalysisPassGroup) OPTIMIZATION(stripMiningGroup) - OPTIMIZATION(prefetchInsertionGroup) OPTIMIZATION(sequentialLoadAndStoreColdGroup) OPTIMIZATION(sequentialLoadAndStoreWarmGroup) OPTIMIZATION(methodHandleInvokeInliningGroup) diff --git a/compiler/optimizer/OMROptimizationManager.cpp b/compiler/optimizer/OMROptimizationManager.cpp index 453eb73961e..cabf3494428 100644 --- a/compiler/optimizer/OMROptimizationManager.cpp +++ b/compiler/optimizer/OMROptimizationManager.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2019 IBM Corp. and others + * Copyright (c) 2000, 2021 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -189,9 +189,6 @@ OMR::OptimizationManager::OptimizationManager(TR::Optimizer *o, OptimizationFact case OMR::stripMining: _flags.set(requiresStructure | checkStructure | dumpStructure); break; - case OMR::prefetchInsertion: - _flags.set(requiresStructure | checkStructure | dumpStructure); - break; case OMR::osrDefAnalysis: if (self()->comp()->getOption(TR_DisableOSRSharedSlots)) _flags.set(doesNotRequireAliasSets | doesNotRequireTreeDumps | supportsIlGenOptLevel); diff --git a/compiler/optimizer/OMROptimizations.enum b/compiler/optimizer/OMROptimizations.enum index 865e73b1aeb..8933066455c 100644 --- a/compiler/optimizer/OMROptimizations.enum +++ b/compiler/optimizer/OMROptimizations.enum @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2019 IBM Corp. and others + * Copyright (c) 2000, 2021 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -99,7 +99,6 @@ OPTIMIZATION(profiledNodeVersioning) OPTIMIZATION(allocationSinking) OPTIMIZATION(stripMining) - OPTIMIZATION(prefetchInsertion) OPTIMIZATION(samplingJProfiling) OPTIMIZATION(trivialDeadTreeRemoval) OPTIMIZATION(osrDefAnalysis) diff --git a/compiler/optimizer/OMROptimizer.cpp b/compiler/optimizer/OMROptimizer.cpp index 201b74c4b09..4f8bf551b67 100644 --- a/compiler/optimizer/OMROptimizer.cpp +++ b/compiler/optimizer/OMROptimizer.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2019 IBM Corp. and others + * Copyright (c) 2000, 2021 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -104,7 +104,6 @@ #include "optimizer/SinkStores.hpp" #include "optimizer/PartialRedundancy.hpp" #include "optimizer/OSRDefAnalysis.hpp" -#include "optimizer/PrefetchInsertion.hpp" #include "optimizer/StripMiner.hpp" #include "optimizer/FieldPrivatizer.hpp" #include "optimizer/ReorderIndexExpr.hpp" @@ -444,13 +443,6 @@ const OptimizationStrategy stripMiningOpts[] = { endGroup } }; -const OptimizationStrategy prefetchInsertionOpts[] = - { - { inductionVariableAnalysis }, - { prefetchInsertion }, - { endGroup } - }; - const OptimizationStrategy blockManipulationOpts[] = { // { generalLoopUnroller, IfLoops }, //Unroll Loops @@ -605,7 +597,6 @@ static const OptimizationStrategy omrHotStrategyOpts[] = { OMR::globalCopyPropagation, }, { OMR::loopCanonicalizationGroup, }, // canonicalize loops (improve fall throughs) { OMR::expressionsSimplification, }, - { OMR::prefetchInsertionGroup, }, // created IL should not be moved { OMR::partialRedundancyEliminationGroup }, { OMR::globalDeadStoreElimination, }, { OMR::inductionVariableAnalysis, }, @@ -849,8 +840,6 @@ OMR::Optimizer::Optimizer(TR::Compilation *comp, TR::ResolvedMethodSymbol *metho new (comp->allocator()) TR::OptimizationManager(self(), TR_OSRExceptionEdgeRemoval::create, OMR::osrExceptionEdgeRemoval); _opts[OMR::regDepCopyRemoval] = new (comp->allocator()) TR::OptimizationManager(self(), TR::RegDepCopyRemoval::create, OMR::regDepCopyRemoval); - _opts[OMR::prefetchInsertion] = - new (comp->allocator()) TR::OptimizationManager(self(), TR_PrefetchInsertion::create, OMR::prefetchInsertion); _opts[OMR::stripMining] = new (comp->allocator()) TR::OptimizationManager(self(), TR_StripMiner::create, OMR::stripMining); _opts[OMR::fieldPrivatization] = @@ -904,8 +893,6 @@ OMR::Optimizer::Optimizer(TR::Compilation *comp, TR::ResolvedMethodSymbol *metho new (comp->allocator()) TR::OptimizationManager(self(), NULL, OMR::veryExpensiveGlobalValuePropagationGroup, veryExpensiveGlobalValuePropagationOpts); _opts[OMR::loopSpecializerGroup] = new (comp->allocator()) TR::OptimizationManager(self(), NULL, OMR::loopSpecializerGroup, loopSpecializerOpts); - _opts[OMR::prefetchInsertionGroup] = - new (comp->allocator()) TR::OptimizationManager(self(), NULL, OMR::prefetchInsertionGroup, prefetchInsertionOpts); _opts[OMR::lateLocalGroup] = new (comp->allocator()) TR::OptimizationManager(self(), NULL, OMR::lateLocalGroup, lateLocalOpts); _opts[OMR::eachLocalAnalysisPassGroup] = diff --git a/compiler/optimizer/OptimizationStrategies.hpp b/compiler/optimizer/OptimizationStrategies.hpp index aed4b86f241..ccbfec197ea 100644 --- a/compiler/optimizer/OptimizationStrategies.hpp +++ b/compiler/optimizer/OptimizationStrategies.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2000, 2019 IBM Corp. and others + * Copyright (c) 2000, 2021 IBM Corp. and others * * This program and the accompanying materials are made available under * the terms of the Eclipse Public License 2.0 which accompanies this @@ -55,7 +55,6 @@ extern const OptimizationStrategy loopCanonicalizationOpts[]; extern const OptimizationStrategy blockManipulationOpts[]; extern const OptimizationStrategy eachLocalAnalysisPassOpts[]; extern const OptimizationStrategy stripMiningOpts[]; -extern const OptimizationStrategy prefetchInsertionOpts[]; extern const OptimizationStrategy methodHandleInvokeInliningOpts[]; //arrays of optimizations diff --git a/compiler/optimizer/PrefetchInsertion.cpp b/compiler/optimizer/PrefetchInsertion.cpp deleted file mode 100644 index 6d5480084c0..00000000000 --- a/compiler/optimizer/PrefetchInsertion.cpp +++ /dev/null @@ -1,518 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2021 IBM Corp. and others - * - * This program and the accompanying materials are made available under - * the terms of the Eclipse Public License 2.0 which accompanies this - * distribution and is available at http://eclipse.org/legal/epl-2.0 - * or the Apache License, Version 2.0 which accompanies this distribution - * and is available at https://www.apache.org/licenses/LICENSE-2.0. - * - * This Source Code may also be made available under the following Secondary - * Licenses when the conditions for such availability set forth in the - * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, - * version 2 with the GNU Classpath Exception [1] and GNU General Public - * License, version 2 with the OpenJDK Assembly Exception [2]. - * - * [1] https://www.gnu.org/software/classpath/license.html - * [2] http://openjdk.java.net/legal/assembly-exception.html - * - * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception - *******************************************************************************/ - -#include "optimizer/PrefetchInsertion.hpp" - -#include -#include -#include "codegen/CodeGenerator.hpp" -#include "env/FrontEnd.hpp" -#include "compile/Compilation.hpp" -#include "compile/SymbolReferenceTable.hpp" -#include "env/CompilerEnv.hpp" -#include "env/ObjectModel.hpp" -#include "env/StackMemoryRegion.hpp" -#include "env/TRMemory.hpp" -#include "env/jittypes.h" -#include "il/Block.hpp" -#include "il/DataTypes.hpp" -#include "il/ILOpCodes.hpp" -#include "il/ILOps.hpp" -#include "il/Node.hpp" -#include "il/Node_inlines.hpp" -#include "il/Symbol.hpp" -#include "il/TreeTop.hpp" -#include "il/TreeTop_inlines.hpp" -#include "infra/Cfg.hpp" -#include "infra/List.hpp" -#include "optimizer/InductionVariable.hpp" -#include "optimizer/LoopCanonicalizer.hpp" -#include "optimizer/Optimization_inlines.hpp" -#include "optimizer/OptimizationManager.hpp" -#include "optimizer/Structure.hpp" - -namespace TR { class SymbolReference; } - -#define OPT_DETAILS "O^O PREFETCH INSERTION: " - -TR_PrefetchInsertion::TR_PrefetchInsertion(TR::OptimizationManager *manager) - : TR_LoopTransformer(manager), - _arrayAccessInfos(manager->trMemory()) - {} - -int32_t TR_PrefetchInsertion::perform() - { - if (comp()->requiresSpineChecks()) - { - // see 172692 - if (trace()) - traceMsg(comp(), "Spine checks required for array element accesses -- returning from prefetch insertion.\n"); - return 0; - } - - if (!comp()->mayHaveLoops()) - { - if (trace()) - traceMsg(comp(), "Method does not have loops -- returning from prefetch insertion.\n"); - return 0; - } - - // Disallow prefetch insertion when read barriers are necessary because this optimization may insert loads - // of array elements past the 0th element for a backwards traversal. That is, if the primary induction - // variable is traversing the array backwards, on the last iteration of the loop we will be prefetching - // the (i - 1)th element of the array. Since the prefetch needs to load such an element, we must ensure - // the read barrier will not trigger on such a value which may "look like" an object. For example today, - // the (i - 1)th element is really the last word of the array header, which is the `dataAddr` pointer - // which looks like an object, but it is not. Thus the read barrier may incorrectly trigger on such a value. - // - // In theory the same issue can happen on a forward traversal since there may be padding bytes past the - // end of an array. For this reason we go with the safest route and just disable the entire prefetch - // insertion optimization if read barriers are necessary. - if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none) - { - if (trace()) - traceMsg(comp(), "Skipping prefetch insertion because read barriers are required"); - - return 0; - } - - _cfg = comp()->getFlowGraph(); - _rootStructure = _cfg->getStructure(); - - _arrayAccessInfos.init(); - - // From here, down, stack memory allocations will die when the function returns - TR::StackMemoryRegion stackMemoryRegion(*trMemory()); - - if (trace()) - { - traceMsg(comp(), "Starting Prefetch Insertion\n"); - comp()->dumpMethodTrees("Before prefetch insertion"); - } - - // Collect and analyze information about loops - collectLoops(_rootStructure); - dumpOptDetails(comp(), "Loop analysis completed...\n"); - - if (!_arrayAccessInfos.isEmpty()) - insertPrefetchInstructions(); - else - dumpOptDetails(comp(), "Prefetch insertion completed: no qualifying loops found\n"); - - return 0; - } - - -void TR_PrefetchInsertion::collectLoops(TR_Structure *str) - { - TR_RegionStructure *region = str->asRegion(); - - if (region == NULL) - return; - - if (region->isNaturalLoop()) - { - if (trace()) - traceMsg(comp(), "\n", region->getNumber(), region); - - // Check if there is an induction variable - // - if (region->getPrimaryInductionVariable() != NULL || !region->getBasicInductionVariables().isEmpty()) - { - if (!region->getEntryBlock()->isCold()) - { - examineLoop(region); - return; - } - else - { - if (trace()) - traceMsg(comp(), "\tReject loop %d ==> cold loop\n", region->getNumber()); - return; - } - } - else - { - if (trace()) - traceMsg(comp(), "\tReject loop %d ==> no basic induction variable\n", region->getNumber()); - } - } - - TR_RegionStructure::Cursor it(*region); - for (TR_StructureSubGraphNode *node = it.getCurrent(); node; node = it.getNext()) - collectLoops(node->getStructure()); - - } - - -static bool identicalSubTrees(TR::Node *node1, TR::Node *node2) - { - if (node1 == node2) - return true; - - if (node1->getOpCodeValue() != node2->getOpCodeValue()) - return false; - - if (node1->getOpCodeValue() == TR::iconst && - node1->getInt() != node2->getInt()) - return false; - - if (node1->getOpCodeValue() == TR::lconst && - node1->getLongInt() != node2->getLongInt()) - return false; - - if (node1->getOpCode().isLoadVar() && - node1->getSymbolReference() != node2->getSymbolReference()) - return false; - - if (node1->getNumChildren() != node2->getNumChildren()) - return false; - - - for (int32_t i = 0; i < node1->getNumChildren(); i++) - if (!identicalSubTrees(node1->getChild(i), node2->getChild(i))) - return false; - - return true; - } - - -void TR_PrefetchInsertion::insertPrefetchInstructions() - { - ListIterator it(&_arrayAccessInfos); - for (ArrayAccessInfo *aai = it.getFirst(); aai; aai = it.getNext()) - { - TR::Block * block = aai->_treeTop->getEnclosingBlock(); - TR_Structure * loop = block->getStructureOf()->getContainingLoop(); - bool skip = false; - ListIterator it1(&_arrayAccessInfos); - for (ArrayAccessInfo *aai1 = it1.getFirst(); !skip && aai1 != aai; aai1 = it1.getNext()) - { - TR::Block * block1 = aai1->_treeTop->getEnclosingBlock(); - if (block == block1 && identicalSubTrees(aai->_addressNode, aai1->_addressNode)) - skip = true; - } - for (ArrayAccessInfo *aai1 = it1.getNext(); !skip && aai1; aai1 = it1.getNext()) - { - TR::Block * block1 = aai1->_treeTop->getEnclosingBlock(); - TR_Structure * loop1 = block1->getStructureOf()->getContainingLoop(); - if (loop == loop1 && block != block1 && identicalSubTrees(aai->_addressNode, aai1->_addressNode)) - skip = true; - } - if (!skip && performTransformation(comp(), "%sInserting prefetch for array access %p in block_%d\n", - OPT_DETAILS, aai->_aaNode, aai->_treeTop->getEnclosingBlock()->getNumber())) - { - // Enable tree simplification on this block to normalize any address computation so we can produce an optimal - // instruction sequence during instruction selection - requestOpt(OMR::treeSimplification, true, aai->_treeTop->getEnclosingBlock()); - - TR::Node *prefetchNode, *addressNode, *deltaNode, *offsetNode, *sizeNode, *typeNode; - - // First child -- base address - deltaNode = createDeltaNode(aai->_addressNode->getSecondChild()->getFirstChild(), aai->_bivNode, - aai->_biv->getDeltaOnBackEdge()); - addressNode = TR::Node::createWithSymRef(TR::aloadi, 1, 1, - TR::Node::create(aai->_addressNode->getOpCodeValue(), 2, aai->_addressNode, deltaNode), - aai->_aaNode->getSymbolReference()); - - addressNode->getFirstChild()->setIsInternalPointer(true); - - // Second child -- offset - offsetNode = TR::Node::iconst(addressNode, 0); - - // Third child -- size - sizeNode = TR::Node::iconst(addressNode, 1); - - // Fourth child -- type - // Better default prefetch type for all platforms is Load, - // except for 390, where it is Store when a store is found in the loop. - // - - bool foundStore = false; -#if defined(TR_TARGET_S390) - //Quick search to identify it this is a store prefetch (if possible) - //Walk all blocks in the loop and search for storei whose first child includes a[i]. - TR_ScratchList blocksInLoop(trMemory()); - aai->_treeTop->getEnclosingBlock()->getStructureOf()->getContainingLoop()->getBlocks(&blocksInLoop); - ListIterator bIt(&blocksInLoop); - for (TR::Block *blk = bIt.getFirst(); blk && !foundStore; blk = bIt.getNext()) - { - TR::TreeTop *currentTree = blk->startOfExtendedBlock()->getEntry(); - TR::TreeTop *exitTree = currentTree->getExtendedBlockExitTreeTop(); - - while ((currentTree != exitTree) && !foundStore) - { - TR::Node *node = currentTree->getNode(); - if (node->getOpCode().isStoreIndirect()) - { - // loopup the a[i]; - if ((node->getFirstChild() == aai->_aaNode) || - (node->getSymbol()->isArrayShadowSymbol() && node->getFirstChild()->getOpCode().isArrayRef() && - node->getFirstChild()->getFirstChild()== aai->_aaNode)) - foundStore = true; - } - currentTree = currentTree->getNextTreeTop(); - } - } -#endif - - static char * disablePrefetchStore = feGetEnv("TR_DISABLEPrefetchStore"); - typeNode = TR::Node::iconst(addressNode, (comp()->target().cpu.isZ() && foundStore && !disablePrefetchStore) ? - (int32_t)PrefetchStore : - (int32_t)PrefetchLoad); - - prefetchNode = TR::Node::createWithSymRef(TR::Prefetch, 4, 4, - addressNode, offsetNode, sizeNode, typeNode, - comp()->getSymRefTab()->findOrCreatePrefetchSymbol()); - - TR::TreeTop *treeTop = aai->_treeTop; - if (treeTop->getNode()->getOpCode().isBranch()) - treeTop = TR::TreeTop::create(comp(), treeTop->getPrevTreeTop(), - TR::Node::create(TR::treetop, 1, - treeTop->getNode()->getFirstChild())); - else if (treeTop->getNode()->canGCandReturn()) - treeTop = treeTop->getPrevTreeTop(); - - TR::TreeTop::create(comp(), treeTop, prefetchNode); - } - } - } - - -TR::Node *TR_PrefetchInsertion::createDeltaNode(TR::Node *node, TR::Node *pivNode, int32_t deltaOnBackEdge) - { - if (node == pivNode) - { - if (pivNode->getDataType() == TR::Int32) - return TR::Node::iconst(pivNode, deltaOnBackEdge); - else - return TR::Node::lconst(pivNode, deltaOnBackEdge); - } - - if (node->getNumChildren() == 0) - return node; - - TR::Node *newNode; - - if(node->getOpCode().hasSymbolReference()) - newNode = TR::Node::createWithSymRef(node, node->getOpCodeValue(), node->getNumChildren(), node->getSymbolReference()); - else - newNode = TR::Node::create(node, node->getOpCodeValue(), node->getNumChildren()); - - for (intptr_t i = 0; i < node->getNumChildren(); i++) - { - newNode->setAndIncChild(i, createDeltaNode(node->getChild(i), pivNode, deltaOnBackEdge)); - } - return newNode; - } - - -// Examine loop to find all candidates for prefetch insertion. -// -void TR_PrefetchInsertion::examineLoop(TR_RegionStructure *loop) - { - intptr_t visitCount = comp()->incVisitCount(); - - TR_ScratchList blocksInLoop(trMemory()); - loop->getBlocks(&blocksInLoop); - ListIterator bIt(&blocksInLoop); - for (TR::Block *block = bIt.getFirst(); block; block = bIt.getNext()) - { - TR::TreeTop *currentTree = block->startOfExtendedBlock()->getEntry(); - TR::TreeTop *exitTree = currentTree->getExtendedBlockExitTreeTop(); - - while (currentTree != exitTree) - { - TR::Node *node = currentTree->getNode(); - if (node->getNumChildren() > 0) - examineNode(currentTree, block, node, visitCount, loop); - currentTree = currentTree->getNextTreeTop(); - } - } - } - - -void TR_PrefetchInsertion::examineNode(TR::TreeTop *treeTop, TR::Block *block, TR::Node *node, intptr_t visitCount, TR_RegionStructure *loop) - { - // If we have seen this node before, we are done - // Otherwise, set visit count - // - if (node->getVisitCount() == visitCount) - return; - - node->setVisitCount(visitCount); - - TR::Symbol *symbol = NULL; - if (node->getOpCode().hasSymbolReference()) - symbol = node->getSymbol(); - - if (symbol && symbol->isArrayShadowSymbol() && node->getOpCodeValue() == TR::aloadi && node->getFirstChild()->getOpCode().isArrayRef()) - { - /* Pattern match for reference array accesses - iaload iaload - aiadd aladd - aload / iaload aload / iaload - isub/iadd lsub/ladd - (imul/ishl) (lmul/lshl) - iload/iadd/isub/imul i2l or lload/ladd/lsub/lmul - iconst iload/iadd/isub/imul - iconst lconst/iconst - lconst - */ - int64_t addConst = 0; - int64_t mulConst = 1; - int64_t mulConstBytes = 1; - TR::Node *firstChild = node->getFirstChild(); - TR::Node *secondChild = firstChild->getSecondChild(); - if ((secondChild->getOpCode().isAdd() || secondChild->getOpCode().isSub()) && - secondChild->getSecondChild()->getOpCode().isLoadConst()) - { - firstChild = secondChild->getFirstChild(); - - if ((firstChild->getOpCode().isMul() || - firstChild->getOpCode().isLeftShift()) && - firstChild->getSecondChild()->getOpCode().isLoadConst()) - { - if (firstChild->getSecondChild()->getOpCodeValue() == TR::iconst) - mulConstBytes = (int64_t)((firstChild->getOpCode().isMul()) ? - firstChild->getSecondChild()->getInt() : - 2 << firstChild->getSecondChild()->getInt()); - else - mulConstBytes = firstChild->getOpCode().isMul() ? - firstChild->getSecondChild()->getLongInt() : - 2 << firstChild->getSecondChild()->getLongInt(); - - firstChild = firstChild->getFirstChild(); - } - - if (firstChild->getOpCode().isConversion()) - { - firstChild = firstChild->getFirstChild(); - } - - if ((firstChild->getOpCode().isAdd() || - firstChild->getOpCode().isSub()) && - firstChild->getSecondChild()->getOpCode().isLoadConst()) - { - if (firstChild->getSecondChild()->getOpCodeValue() == TR::iconst) - addConst = (int64_t)((firstChild->getOpCode().isAdd()) ? - (int64_t)firstChild->getSecondChild()->getInt() : - -firstChild->getSecondChild()->getInt()); - else - addConst = firstChild->getOpCode().isAdd() ? - firstChild->getSecondChild()->getLongInt() : - -firstChild->getSecondChild()->getLongInt(); - - firstChild = firstChild->getFirstChild(); - } - - if ((firstChild->getOpCode().isMul() || - firstChild->getOpCode().isLeftShift()) && - firstChild->getSecondChild()->getOpCode().isLoadConst()) - { - if (firstChild->getSecondChild()->getOpCodeValue() == TR::iconst) - mulConst = (int64_t)((firstChild->getOpCode().isMul()) ? - firstChild->getSecondChild()->getInt() : - 2 << firstChild->getSecondChild()->getInt()); - else - mulConst = firstChild->getOpCode().isMul() ? - firstChild->getSecondChild()->getLongInt() : - 2 << firstChild->getSecondChild()->getLongInt(); - - firstChild = firstChild->getFirstChild(); - } - - TR_PrimaryInductionVariable *closestPIV = NULL; // argument loop is the most outer loop - TR_BasicInductionVariable *biv = NULL; // argument loop is the most outer loop - if (firstChild->getOpCode().isLoadDirect() && - (((closestPIV=getClosestPIV(block)) && (firstChild->getOpCode().hasSymbolReference() && firstChild->getSymbolReference() == closestPIV->getSymRef())) || - (closestPIV == NULL && isBIV(firstChild->getSymbolReference(), block, biv)))) - { - if (closestPIV) - biv = closestPIV; - - int64_t stepInBytes = mulConstBytes * (mulConst * (int64_t)biv->getDeltaOnBackEdge() + addConst); - TR_Structure *loop1= treeTop->getEnclosingBlock()->getStructureOf()->getContainingLoop(); - bool isTreetopInLoop = (loop1 && (loop1->asRegion() == loop)) ? true : false; - - if (isTreetopInLoop && ( - (stepInBytes > 0 && stepInBytes <= TR::Compiler->vm.heapTailPaddingSizeInBytes()) || - (stepInBytes < 0 && -stepInBytes <= TR::Compiler->om.contiguousArrayHeaderSizeInBytes()))) - { - // Save array access info - // - ArrayAccessInfo *aai = (ArrayAccessInfo *) trMemory()->allocateStackMemory(sizeof(ArrayAccessInfo)); - aai->_treeTop = treeTop; - aai->_aaNode = node; - aai->_addressNode = node->getFirstChild(); - aai->_bivNode = firstChild; - aai->_biv = biv; - _arrayAccessInfos.add(aai); - - if (trace()) - traceMsg(comp(), "Found array access: node %p, access address node %p, biv node %p\n", - node, aai->_addressNode, aai->_bivNode); - return; - } - } - } - } - - /* Walk its children */ - for (intptr_t i = 0; i < node->getNumChildren(); i++) - { - examineNode(treeTop, block, node->getChild(i), visitCount, loop); - } - } - - -TR_PrimaryInductionVariable *TR_PrefetchInsertion::getClosestPIV(TR::Block *block) - { - TR_Structure *loop = block->getStructureOf()->getContainingLoop(); - if (loop && loop->asRegion()) - return loop->asRegion()->getPrimaryInductionVariable(); - return NULL; - } - -bool TR_PrefetchInsertion::isBIV(TR::SymbolReference* symRef, TR::Block *block, TR_BasicInductionVariable* &biv) - { - List *bivs; - TR_Structure *loop = block->getStructureOf()->getContainingLoop(); - if (loop && loop->asRegion()) - bivs = &loop->asRegion()->getBasicInductionVariables(); - else - return false; - - ListIterator it(bivs); - for (biv = it.getFirst(); biv; biv = it.getNext()) - { - if (biv->getSymRef() == symRef) - return true; - } - - return false; - } - -const char * -TR_PrefetchInsertion::optDetailString() const throw() - { - return "O^O PREFETCH INSERTION: "; - } diff --git a/compiler/optimizer/PrefetchInsertion.hpp b/compiler/optimizer/PrefetchInsertion.hpp deleted file mode 100644 index 0804909d887..00000000000 --- a/compiler/optimizer/PrefetchInsertion.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2020 IBM Corp. and others - * - * This program and the accompanying materials are made available under - * the terms of the Eclipse Public License 2.0 which accompanies this - * distribution and is available at http://eclipse.org/legal/epl-2.0 - * or the Apache License, Version 2.0 which accompanies this distribution - * and is available at https://www.apache.org/licenses/LICENSE-2.0. - * - * This Source Code may also be made available under the following Secondary - * Licenses when the conditions for such availability set forth in the - * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, - * version 2 with the GNU Classpath Exception [1] and GNU General Public - * License, version 2 with the OpenJDK Assembly Exception [2]. - * - * [1] https://www.gnu.org/software/classpath/license.html - * [2] http://openjdk.java.net/legal/assembly-exception.html - * - * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception - *******************************************************************************/ - -#include "optimizer/LoopCanonicalizer.hpp" - -#include -#include "env/jittypes.h" -#include "infra/List.hpp" -#include "optimizer/OptimizationManager.hpp" - -class TR_BasicInductionVariable; -class TR_PrimaryInductionVariable; -class TR_RegionStructure; -class TR_Structure; -namespace TR { class Block; } -namespace TR { class Node; } -namespace TR { class Optimization; } -namespace TR { class SymbolReference; } -namespace TR { class TreeTop; } - -class TR_PrefetchInsertion : public TR_LoopTransformer - { - public: - TR_PrefetchInsertion(TR::OptimizationManager *manager); - static TR::Optimization *create(TR::OptimizationManager *manager) - { - return new (manager->allocator()) TR_PrefetchInsertion(manager); - } - - virtual int32_t perform(); - const char * optDetailString() const throw(); - - private: - struct ArrayAccessInfo - { - TR::TreeTop *_treeTop; - TR::Node *_aaNode; - TR::Node *_addressNode; - TR::Node *_bivNode; - TR_BasicInductionVariable *_biv; - }; - - TR_ScratchList _arrayAccessInfos; - - void collectLoops(TR_Structure *str); - void insertPrefetchInstructions(); - TR::Node *createDeltaNode(TR::Node *node, TR::Node *pivNode, int32_t deltaOnBackEdge); - void examineLoop(TR_RegionStructure *loop); - void examineNode(TR::TreeTop *treeTop, TR::Block *block, TR::Node *node, intptr_t visitCount, TR_RegionStructure *loop); - TR_PrimaryInductionVariable *getClosestPIV(TR::Block *block); - bool isBIV(TR::SymbolReference* symRef, TR::Block *block, TR_BasicInductionVariable* &biv); - }; diff --git a/fvtest/compilertest/build/files/common.mk b/fvtest/compilertest/build/files/common.mk index aa81410f111..7ea2e702c9a 100644 --- a/fvtest/compilertest/build/files/common.mk +++ b/fvtest/compilertest/build/files/common.mk @@ -151,7 +151,6 @@ JIT_PRODUCT_BACKEND_SOURCES+=\ $(JIT_OMR_DIRTY_DIR)/optimizer/OSRDefAnalysis.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/PartialRedundancy.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/PreExistence.cpp \ - $(JIT_OMR_DIRTY_DIR)/optimizer/PrefetchInsertion.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/Reachability.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/ReachingDefinitions.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/OMRRecognizedCallTransformer.cpp \ diff --git a/jitbuilder/build/files/common.mk b/jitbuilder/build/files/common.mk index c8dd34e2599..21ed48e9286 100644 --- a/jitbuilder/build/files/common.mk +++ b/jitbuilder/build/files/common.mk @@ -153,7 +153,6 @@ JIT_PRODUCT_BACKEND_SOURCES+=\ $(JIT_OMR_DIRTY_DIR)/optimizer/OSRDefAnalysis.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/PartialRedundancy.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/PreExistence.cpp \ - $(JIT_OMR_DIRTY_DIR)/optimizer/PrefetchInsertion.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/Reachability.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/ReachingDefinitions.cpp \ $(JIT_OMR_DIRTY_DIR)/optimizer/OMRRecognizedCallTransformer.cpp \