From 70d20f1445127d6473283047d0a551ccf85f897f Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 2 Dec 2021 23:37:26 +0300 Subject: [PATCH] [arm64] JIT: Add with sign/zero extend (#61549) --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarm64.cpp | 46 +++++++++++++++++++++++++ src/coreclr/jit/codegenarmarch.cpp | 4 +++ src/coreclr/jit/codegenlinear.cpp | 2 +- src/coreclr/jit/emitarm64.cpp | 2 +- src/coreclr/jit/gtlist.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 54 ++++++++++++++++++++++++++---- src/coreclr/jit/lsraarm64.cpp | 1 + src/coreclr/jit/lsrabuild.cpp | 5 +-- 9 files changed, 105 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 6161feb05ef33..343bcb138300b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1254,6 +1254,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCodeForJumpCompare(GenTreeOp* tree); void genCodeForMadd(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); + void genCodeForAddEx(GenTreeOp* tree); #endif // TARGET_ARM64 #if defined(FEATURE_EH_FUNCLETS) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 076df275868a3..db45c98b68e63 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -9495,4 +9495,50 @@ void CodeGen::genCodeForBfiz(GenTreeOp* tree) genProduceReg(tree); } +//------------------------------------------------------------------------ +// genCodeForAddEx: Generates the code sequence for a GenTree node that +// represents an addition with sign or zero extended +// +// Arguments: +// tree - the add with extend node. +// +void CodeGen::genCodeForAddEx(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_ADDEX) && !(tree->gtFlags & GTF_SET_FLAGS)); + genConsumeOperands(tree); + + GenTree* op; + GenTree* containedOp; + if (tree->gtGetOp1()->isContained()) + { + containedOp = tree->gtGetOp1(); + op = tree->gtGetOp2(); + } + else + { + containedOp = tree->gtGetOp2(); + op = tree->gtGetOp1(); + } + assert(containedOp->isContained() && !op->isContained()); + + regNumber dstReg = tree->GetRegNum(); + regNumber op1Reg = op->GetRegNum(); + regNumber op2Reg = containedOp->gtGetOp1()->GetRegNum(); + + if (containedOp->OperIs(GT_CAST)) + { + GenTreeCast* cast = containedOp->AsCast(); + assert(varTypeIsLong(cast->CastToType())); + insOpts opts = cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW; + GetEmitter()->emitIns_R_R_R(INS_add, emitActualTypeSize(tree), dstReg, op1Reg, op2Reg, opts); + } + else + { + assert(containedOp->OperIs(GT_LSH)); + ssize_t cns = containedOp->gtGetOp2()->AsIntCon()->IconValue(); + GetEmitter()->emitIns_R_R_R_I(INS_add, emitActualTypeSize(tree), dstReg, op1Reg, op2Reg, cns, INS_OPTS_LSL); + } + genProduceReg(tree); +} + #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 22379e3c9dbed..785ae4f96647b 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -317,6 +317,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForSwap(treeNode->AsOp()); break; + case GT_ADDEX: + genCodeForAddEx(treeNode->AsOp()); + break; + case GT_BFIZ: genCodeForBfiz(treeNode->AsOp()); break; diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 38ac9a92d86d4..51e9afc074398 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1643,7 +1643,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) } #endif // FEATURE_HW_INTRINSICS #endif // TARGET_XARCH - else if (tree->OperIs(GT_BITCAST, GT_NEG)) + else if (tree->OperIs(GT_BITCAST, GT_NEG, GT_CAST, GT_LSH)) { genConsumeRegs(tree->gtGetOp1()); } diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 1bcda1dd0a88a..5aade83cbda17 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -5857,7 +5857,7 @@ void emitter::emitIns_R_R_R( case INS_adds: case INS_subs: - emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE); + emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, opt); return; case INS_cmeq: diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index f756a3b7bd470..ad45f810ad85d 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -291,6 +291,7 @@ GTNODE(MADD , GenTreeOp ,0, GTK_BINOP) // Ge GTNODE(JMPTABLE , GenTree ,0, (GTK_LEAF|GTK_NOCONTAIN)) // Generates the jump table for switches GTNODE(SWITCH_TABLE , GenTreeOp ,0, (GTK_BINOP|GTK_NOVALUE)) // Jump Table based switch construct #ifdef TARGET_ARM64 +GTNODE(ADDEX, GenTreeOp ,0, GTK_BINOP) // Add with sign/zero extension GTNODE(BFIZ , GenTreeOp ,0, GTK_BINOP) // Bitfield Insert in Zero #endif diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 345edc402cc3f..279a20e7a3621 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1483,25 +1483,28 @@ void Lowering::ContainCheckIndir(GenTreeIndir* indirNode) // void Lowering::ContainCheckBinary(GenTreeOp* node) { + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + // Check and make op2 contained (if it is a containable immediate) - CheckImmedAndMakeContained(node, node->gtOp2); + CheckImmedAndMakeContained(node, op2); #ifdef TARGET_ARM64 // Find "a * b + c" or "c + a * b" in order to emit MADD/MSUB if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained() && node->OperIs(GT_ADD) && - !node->gtOverflow() && (node->gtGetOp1()->OperIs(GT_MUL) || node->gtGetOp2()->OperIs(GT_MUL))) + !node->gtOverflow() && (op1->OperIs(GT_MUL) || op2->OperIs(GT_MUL))) { GenTree* mul; GenTree* c; - if (node->gtGetOp1()->OperIs(GT_MUL)) + if (op1->OperIs(GT_MUL)) { - mul = node->gtGetOp1(); - c = node->gtGetOp2(); + mul = op1; + c = op2; } else { - mul = node->gtGetOp2(); - c = node->gtGetOp1(); + mul = op2; + c = op1; } GenTree* a = mul->gtGetOp1(); @@ -1526,6 +1529,43 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) MakeSrcContained(node, mul); } } + + // Change ADD TO ADDEX for ADD(X, CAST(Y)) or ADD(CAST(X), Y) where CAST is int->long + // or for ADD(LSH(X, CNS), X) or ADD(X, LSH(X, CNS)) where CNS is in the (0..typeWidth) range + if (node->OperIs(GT_ADD) && !op1->isContained() && !op2->isContained() && varTypeIsIntegral(node) && + !node->gtOverflow()) + { + assert(!node->isContained()); + + if (op1->OperIs(GT_CAST) || op2->OperIs(GT_CAST)) + { + GenTree* cast = op1->OperIs(GT_CAST) ? op1 : op2; + if (cast->gtGetOp1()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && !cast->gtOverflow()) + { + node->ChangeOper(GT_ADDEX); + MakeSrcContained(node, cast); + } + } + else if (op1->OperIs(GT_LSH) || op2->OperIs(GT_LSH)) + { + GenTree* lsh = op1->OperIs(GT_LSH) ? op1 : op2; + GenTree* shiftBy = lsh->gtGetOp2(); + + if (shiftBy->IsCnsIntOrI()) + { + const ssize_t shiftByCns = shiftBy->AsIntCon()->IconValue(); + const ssize_t maxShift = (ssize_t)genTypeSize(node) * BITS_IN_BYTE; + + if ((shiftByCns > 0) && (shiftByCns < maxShift)) + { + // shiftBy is small so it has to be contained at this point. + assert(shiftBy->isContained()); + node->ChangeOper(GT_ADDEX); + MakeSrcContained(node, lsh); + } + } + } + } #endif } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 8a6393e235195..1b201603f95a1 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -266,6 +266,7 @@ int LinearScan::BuildNode(GenTree* tree) } FALLTHROUGH; + case GT_ADDEX: case GT_AND: case GT_AND_NOT: case GT_OR: diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 097df9b2f0ee9..ab70d0b78859f 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3102,9 +3102,10 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) // Can be contained for MultiplyAdd on arm64 return BuildBinaryUses(node->AsOp(), candidates); } - if (node->OperIs(GT_NEG)) + if (node->OperIs(GT_NEG, GT_CAST, GT_LSH)) { - // Can be contained for MultiplyAdd on arm64 + // GT_NEG can be contained for MultiplyAdd on arm64 + // GT_CAST and GT_LSH for ADD with sign/zero extension return BuildOperandUses(node->gtGetOp1(), candidates); } #endif