Skip to content

Commit

Permalink
Support a few "shifted register" operations on Arm64 (#75823)
Browse files Browse the repository at this point in the history
* Refactor Lowering::IsContainableBinaryOp so node checks are simpler

* Update Lowering::ContainCheckBinary to check both operands for a commutative oper

* Updating Lowering::IsContainableBinaryOp to support some shifted register instructions

* Resolving some build failures/asserts

* Ensure IsContainableBinaryOp checks IsSafeToContainMem

* Use parentNode not node

* Ensure shifted register instructions that set flags use the right instruction

* Ensure the shift amount is checked for smaller nodes

* Apply suggestions from code review

* Apply suggestions from code review

Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com>

Co-authored-by: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com>
  • Loading branch information
tannergooding and SingleAccretion committed Sep 27, 2022
1 parent 6f9366a commit 27f1398
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 22 deletions.
81 changes: 80 additions & 1 deletion src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2527,7 +2527,9 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree)
{
// In the future, we might consider enabling this for floating-point "unsafe" math.
assert(varTypeIsIntegral(tree));
assert(!(tree->gtFlags & GTF_SET_FLAGS));

// These operations cannot set flags
assert((tree->gtFlags & GTF_SET_FLAGS) == 0);

GenTree* a = op1;
GenTree* b = op2->gtGetOp1();
Expand Down Expand Up @@ -2560,6 +2562,83 @@ void CodeGen::genCodeForBinary(GenTreeOp* tree)
genProduceReg(tree);
return;
}
else if (op2->OperIs(GT_LSH, GT_RSH, GT_RSZ) && op2->isContained())
{
assert(varTypeIsIntegral(tree));

GenTree* a = op1;
GenTree* b = op2->gtGetOp1();
GenTree* c = op2->gtGetOp2();

// The shift amount needs to be contained as well
assert(c->isContained() && c->IsCnsIntOrI());

instruction ins = genGetInsForOper(tree->OperGet(), targetType);
insOpts opt = INS_OPTS_NONE;

if ((tree->gtFlags & GTF_SET_FLAGS) != 0)
{
// A subset of operations can still set flags

switch (oper)
{
case GT_ADD:
{
ins = INS_adds;
break;
}

case GT_SUB:
{
ins = INS_subs;
break;
}

case GT_AND:
{
ins = INS_ands;
break;
}

default:
{
noway_assert(!"Unexpected BinaryOp with GTF_SET_FLAGS set");
}
}
}

switch (op2->gtOper)
{
case GT_LSH:
{
opt = INS_OPTS_LSL;
break;
}

case GT_RSH:
{
opt = INS_OPTS_ASR;
break;
}

case GT_RSZ:
{
opt = INS_OPTS_LSR;
break;
}

default:
{
unreached();
}
}

emit->emitIns_R_R_R_I(ins, emitActualTypeSize(tree), targetReg, a->GetRegNum(), b->GetRegNum(),
c->AsIntConCommon()->IconValue(), opt);

genProduceReg(tree);
return;
}

if (tree->OperIs(GT_AND) && op2->isContainedAndNotIntOrIImmed())
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1674,7 +1674,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
}
#endif // FEATURE_HW_INTRINSICS
#endif // TARGET_XARCH
else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH, GT_BSWAP, GT_BSWAP16))
else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ, GT_BSWAP, GT_BSWAP16))
{
genConsumeRegs(tree->gtGetOp1());
}
Expand Down
146 changes: 130 additions & 16 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,40 +158,134 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
//
bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const
{
// The node we're checking should be one of the two child nodes
assert((parentNode->gtGetOp1() == childNode) || (parentNode->gtGetOp2() == childNode));

// We cannot contain if the parent node
// * is contained
// * is not operating on an integer
// * is already marking a child node as contained
// * is required to throw on overflow

if (parentNode->isContained())
return false;

if (!varTypeIsIntegral(parentNode))
return false;

if (parentNode->gtFlags & GTF_SET_FLAGS)
if (parentNode->gtGetOp1()->isContained() || parentNode->gtGetOp2()->isContained())
return false;

GenTree* op1 = parentNode->gtGetOp1();
GenTree* op2 = parentNode->gtGetOp2();
if (parentNode->OperMayOverflow() && parentNode->gtOverflow())
return false;

if (op2 != childNode)
// We cannot contain if the child node:
// * is not operating on an integer
// * is required to set a flag
// * is required to throw on overflow

if (!varTypeIsIntegral(childNode))
return false;

if (op1->isContained() || op2->isContained())
if ((childNode->gtFlags & GTF_SET_FLAGS) != 0)
return false;

if (!varTypeIsIntegral(op2))
if (childNode->OperMayOverflow() && childNode->gtOverflow())
return false;

if (op2->gtFlags & GTF_SET_FLAGS)
GenTree* matchedOp = nullptr;

if (childNode->OperIs(GT_MUL))
{
if (childNode->gtGetOp1()->isContained() || childNode->gtGetOp2()->isContained())
{
// Cannot contain if either of the childs operands is already contained
return false;
}

if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0)
{
// Cannot contain if the parent operation needs to set flags
return false;
}

if (parentNode->OperIs(GT_ADD))
{
// Find "c + (a * b)" or "(a * b) + c"
return IsSafeToContainMem(parentNode, childNode);
}

if (parentNode->OperIs(GT_SUB))
{
// Find "c - (a * b)"
assert(childNode == parentNode->gtGetOp2());
return IsSafeToContainMem(parentNode, childNode);
}

// TODO: Handle mneg
return false;
}

// Find "a + b * c" or "a - b * c".
if (parentNode->OperIs(GT_ADD, GT_SUB) && op2->OperIs(GT_MUL))
if (childNode->OperIs(GT_LSH, GT_RSH, GT_RSZ))
{
if (parentNode->gtOverflow())
// Find "a op (b shift cns)"

if (childNode->gtGetOp1()->isContained())
{
// Cannot contain if the childs op1 is already contained
return false;
}

GenTree* shiftAmountNode = childNode->gtGetOp2();

if (!shiftAmountNode->IsCnsIntOrI())
{
// Cannot contain if the childs op2 is not a constant
return false;
}

ssize_t shiftAmount = shiftAmountNode->AsIntCon()->IconValue();

if ((shiftAmount < 0x01) || (shiftAmount > 0x3F))
{
// Cannot contain if the shift amount is less than 1 or greater than 63
return false;
}

if (!varTypeIsLong(childNode) && (shiftAmount > 0x1F))
{
// Cannot contain if the shift amount is greater than 31
return false;
}

if (op2->gtOverflow())
if (parentNode->OperIs(GT_ADD, GT_SUB, GT_AND))
{
// These operations can still report flags

if (IsSafeToContainMem(parentNode, childNode))
{
assert(shiftAmountNode->isContained());
return true;
}
}

if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0)
{
// Cannot contain if the parent operation needs to set flags
return false;
}

if (parentNode->OperIs(GT_CMP, GT_AND, GT_OR, GT_XOR))
{
if (IsSafeToContainMem(parentNode, childNode))
{
assert(shiftAmountNode->isContained());
return true;
}
}

return !op2->gtGetOp1()->isContained() && !op2->gtGetOp2()->isContained();
// TODO: Handle CMN, NEG/NEGS, BIC/BICS, EON, MVN, ORN, TST
return false;
}

return false;
Expand Down Expand Up @@ -1846,13 +1940,33 @@ void Lowering::ContainCheckBinary(GenTreeOp* node)
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();

// Check and make op2 contained (if it is a containable immediate)
CheckImmedAndMakeContained(node, op2);
if (CheckImmedAndMakeContained(node, op2))
{
return;
}

if (node->OperIsCommutative() && CheckImmedAndMakeContained(node, op1))
{
MakeSrcContained(node, op1);
std::swap(node->gtOp1, node->gtOp2);
return;
}

#ifdef TARGET_ARM64
if (comp->opts.OptimizationEnabled() && IsContainableBinaryOp(node, op2))
if (comp->opts.OptimizationEnabled())
{
MakeSrcContained(node, op2);
if (IsContainableBinaryOp(node, op2))
{
MakeSrcContained(node, op2);
return;
}

if (node->OperIsCommutative() && IsContainableBinaryOp(node, op1))
{
MakeSrcContained(node, op1);
std::swap(node->gtOp1, node->gtOp2);
return;
}
}

// Change ADD TO ADDEX for ADD(X, CAST(Y)) or ADD(CAST(X), Y) where CAST is int->long
Expand Down
9 changes: 5 additions & 4 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3143,14 +3143,15 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates)
#ifdef TARGET_ARM64
if (node->OperIs(GT_MUL) || node->OperIsCmpCompare() || node->OperIs(GT_AND))
{
// Can be contained for MultiplyAdd on arm64.
// MUL can be contained for madd or msub on arm64.
// Compare and AND may be contained due to If Conversion.
return BuildBinaryUses(node->AsOp(), candidates);
}
if (node->OperIs(GT_NEG, GT_CAST, GT_LSH))
if (node->OperIs(GT_NEG, GT_CAST, GT_LSH, GT_RSH, GT_RSZ))
{
// GT_NEG can be contained for MultiplyAdd on arm64
// GT_CAST and GT_LSH for ADD with sign/zero extension
// NEG can be contained for mneg on arm64
// CAST and LSH for ADD with sign/zero extension
// LSH, RSH, and RSZ for various "shifted register" instructions on arm64
return BuildOperandUses(node->gtGetOp1(), candidates);
}
#endif
Expand Down

0 comments on commit 27f1398

Please sign in to comment.