Skip to content

Commit

Permalink
Accelerate vectorizedMismatch in IL
Browse files Browse the repository at this point in the history
This patch adds ArraysSupport.vectorizedMismatch as a recognized method.
If arraycmp is supported, vectorizedMismatch call nodes are transformed
to a functionally equivalent tree using arraycmp.

Fixes: #15204
Signed-off-by: Spencer Comin <spencer.comin@ibm.com>
  • Loading branch information
Spencer-Comin committed Feb 2, 2023
1 parent 892071c commit 256dd74
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 5 deletions.
3 changes: 2 additions & 1 deletion runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2022 IBM Corp. and others
* Copyright (c) 2000, 2023 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -450,6 +450,7 @@
jdk_internal_misc_Unsafe_copyMemory0,
jdk_internal_loader_NativeLibraries_load,
jdk_internal_util_Preconditions_checkIndex,
jdk_internal_util_ArraysSupport_vectorizedMismatch,

FirstVectorMethod,
jdk_internal_vm_vector_VectorSupport_load = FirstVectorMethod,
Expand Down
10 changes: 8 additions & 2 deletions runtime/compiler/env/j9method.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2022 IBM Corp. and others
* Copyright (c) 2000, 2023 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -3920,6 +3920,12 @@ void TR_ResolvedJ9Method::construct()
{TR::unknownMethod}
};

static X ArraysSupportMethods [] =
{
{x(TR::jdk_internal_util_ArraysSupport_vectorizedMismatch, "vectorizedMismatch", "(Ljava/lang/Object;JLjava/lang/Object;JII)I")},
{ TR::unknownMethod}
};

struct Y { const char * _class; X * _methods; };

/* classXX where XX is the number of characters in the class name */
Expand Down Expand Up @@ -4099,7 +4105,7 @@ void TR_ResolvedJ9Method::construct()
{ "com/ibm/jit/DecimalFormatHelper", DecimalFormatHelperMethods},
{ "jdk/internal/reflect/Reflection", ReflectionMethods },
{ "jdk/internal/util/Preconditions", PreconditionsMethods },

{ "jdk/internal/util/ArraysSupport", ArraysSupportMethods },
{ 0 }
};
static Y class32[] =
Expand Down
109 changes: 108 additions & 1 deletion runtime/compiler/optimizer/J9RecognizedCallTransformer.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2017, 2022 IBM Corp. and others
* Copyright (c) 2017, 2023 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -327,6 +327,108 @@ void J9::RecognizedCallTransformer::process_java_lang_StringUTF16_toBytes(TR::Tr
treetop->insertAfter(TR::TreeTop::create(comp(), TR::Node::create(node, TR::treetop, 1, newCallNode)));
}

/*
This method inlines a call to ArraysSupport.vectorizedMismatch to a node equivalent to the following pseudocode
lengthInBytes = length << log2ArrayIndexScale
mask = (log2ArrayIndexScale<<1) | 3
n = lengthInBytes & ~(mask)
res = arrayCmpLen(a+aOffset, b+bOffset, n)
if (res == n) // no mismatch found
return ~((lengthInBytes & mask) >> log2ArrayIndexScale)
else // mismatch found
return res >> log2ArrayIndexScale
Node before the transformation:
icall jdk/internal/util/ArraysSupport.vectorizedMismatch(Ljava/lang/Object;JLjava/lang/Object;JII)I
<a>
<aOffset>
<b>
<bOffset>
<length>
<log2ArrayIndexScale>
Node after the transformation:
iselect ()
icmpeq
arraycmp (arrayCmpLen )
aladd
<a>
<aOffset>
aladd
<b>
<bOffset>
i2l
iand
ishl
<length>
<log2ArrayIndexScale>
ixor
ior
ishl
<log2ArrayIndexScale>
iconst 1
iconst 3
iconst -1
==>iand
ixor
ishr
iand
==>ishl
==>ior
<log2ArrayIndexScale>
==>iconst -1
ishr
==>arraycmp
<log2ArrayIndexScale>
*/
void J9::RecognizedCallTransformer::process_jdk_internal_util_ArraysSupport_vectorizedMismatch(TR::TreeTop* treetop, TR::Node* node)
{
TR::Node* a = node->getChild(0);
TR::Node* aOffset = node->getChild(1);
TR::Node* b = node->getChild(2);
TR::Node* bOffset = node->getChild(3);
TR::Node* length = node->getChild(4);
TR::Node* log2ArrayIndexScale = node->getChild(5);

TR::Node* lengthInBytes = TR::Node::create(TR::ishl, 2, length, log2ArrayIndexScale);
TR::Node* mask = TR::Node::create(TR::ior, 2,
TR::Node::create(TR::ishl, 2,
log2ArrayIndexScale,
TR::Node::iconst(1)),
TR::Node::iconst(3));
TR::Node* n = TR::Node::create(TR::iand, 2,
lengthInBytes,
TR::Node::create(TR::ixor, 2, mask, TR::Node::iconst(-1)));
// TODO: replace the aladd's in the following with generateDataAddrLoadTrees when off heap memory changes come in
TR::Node* res = TR::Node::create(TR::arraycmp, 3,
TR::Node::create(TR::aladd, 2, a, aOffset),
TR::Node::create(TR::aladd, 2, b, bOffset),
TR::Node::create(TR::i2l, 1, n));
res->setArrayCmpLen(true);
res->setSymbolReference(getSymRefTab()->findOrCreateArrayCmpSymbol());
TR::Node* thenNode = (TR::Node::create(TR::ixor, 2,
TR::Node::create(TR::ishr, 2,
TR::Node::create(TR::iand, 2, lengthInBytes, mask),
log2ArrayIndexScale),
TR::Node::iconst(-1)));
TR::Node* elseNode = TR::Node::create(TR::ishr, 2, res, log2ArrayIndexScale);
TR::Node* conditionNode = TR::Node::create(TR::icmpeq, 2, res, n);

anchorAllChildren(node, treetop);
prepareToReplaceNode(node);

TR::Node::recreate(node, TR::iselect);
node->setNumChildren(3);
node->setAndIncChild(0, conditionNode);
node->setAndIncChild(1, thenNode);
node->setAndIncChild(2, elseNode);

TR::TransformUtil::removeTree(comp(), treetop);
}

void J9::RecognizedCallTransformer::process_java_lang_StrictMath_and_Math_sqrt(TR::TreeTop* treetop, TR::Node* node)
{
TR::Node* valueNode = node->getLastChild();
Expand Down Expand Up @@ -1242,6 +1344,8 @@ bool J9::RecognizedCallTransformer::isInlineable(TR::TreeTop* treetop)
case TR::java_lang_StringCoding_encodeASCII:
case TR::java_lang_String_encodeASCII:
return comp()->cg()->getSupportsInlineEncodeASCII();
case TR::jdk_internal_util_ArraysSupport_vectorizedMismatch:
return comp()->cg()->getSupportsArrayCmp();
default:
return false;
}
Expand Down Expand Up @@ -1376,6 +1480,9 @@ void J9::RecognizedCallTransformer::transform(TR::TreeTop* treetop)
case TR::java_lang_Long_reverseBytes:
processIntrinsicFunction(treetop, node, TR::lbyteswap);
break;
case TR::jdk_internal_util_ArraysSupport_vectorizedMismatch:
process_jdk_internal_util_ArraysSupport_vectorizedMismatch(treetop, node);
break;
default:
break;
}
Expand Down
24 changes: 23 additions & 1 deletion runtime/compiler/optimizer/J9RecognizedCallTransformer.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2017, 2022 IBM Corp. and others
* Copyright (c) 2017, 2023 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -139,6 +139,28 @@ class RecognizedCallTransformer : public OMR::RecognizedCallTransformer
* \endcode
*/
void process_java_lang_StringUTF16_toBytes(TR::TreeTop* treetop, TR::Node* node);
/** \brief
* Transforms jdk/internal/util/ArraysSupport.vectorizedMismatch(Ljava/lang/Object;JLjava/lang/Object;JII)I
* into an arraycmp, bit manipulation and iselect sequence with equivalent semantics.
*
* \param treetop
* The treetop which anchors the call node.
*
* \param node
* The call node representing a call to jdk/internal/util/ArraysSupport.vectorizedMismatch(Ljava/lang/Object;JLjava/lang/Object;JII)I
* which has the following shape:
*
* \code
* icall <jdk/internal/util/ArraysSupport.vectorizedMismatch(Ljava/lang/Object;JLjava/lang/Object;JII)I>
* <a>
* <aOffset>
* <b>
* <bOffset>
* <length>
* <log2ArrayIndexScale>
* \endcode
*/
void process_jdk_internal_util_ArraysSupport_vectorizedMismatch(TR::TreeTop* treetop, TR::Node* node);
/** \brief
* Transforms java/lang/StrictMath.sqrt(D)D and java/lang/Math.sqrt(D)D into a CodeGen inlined function with equivalent semantics.
*
Expand Down

0 comments on commit 256dd74

Please sign in to comment.