From b8b4e0b55e44b95276cad5b3ac1aab2a2e0a53e3 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 5 Jun 2017 14:48:51 -0700 Subject: [PATCH] [*64] Allow more fastTailCalls involving structs Before this change structs on Arm64 and Amd64 Unix could pessimize when we could fastTailCall if they were engregisterable and took more than one register. This updates the canFastTailCall function to allow this pattern to be fastTailCalled. --- src/jit/compiler.h | 16 +-- src/jit/lclvars.cpp | 11 +- src/jit/lower.cpp | 1 - src/jit/morph.cpp | 100 ++++++++++++++---- .../DoNotFastTailCallWithStructs.cs | 81 ++++++++++++++ .../DoNotFastTailCallWithStructs.csproj | 45 ++++++++ .../StackFixup.cs} | 3 + .../StackFixup.csproj} | 2 +- .../opt/FastTailCall/StructPassingSimple.cs | 38 +++++++ .../FastTailCall/StructPassingSimple.csproj | 33 ++++++ 10 files changed, 299 insertions(+), 31 deletions(-) create mode 100644 tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs create mode 100644 tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj rename tests/src/JIT/opt/{Tailcall/FastTailCallStackFixup.cs => FastTailCall/StackFixup.cs} (89%) rename tests/src/JIT/opt/{Tailcall/FastTailCallStackFixup.csproj => FastTailCall/StackFixup.csproj} (97%) create mode 100644 tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs create mode 100644 tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj diff --git a/src/jit/compiler.h b/src/jit/compiler.h index d4388628c609..416c097c6f01 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -8186,13 +8186,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool compPublishStubParam : 1; // EAX captured in prolog will be available through an instrinsic bool compRetBuffDefStack : 1; // The ret buff argument definitely points into the stack. - var_types compRetType; // Return type of the method as declared in IL - var_types compRetNativeType; // Normalized return type as per target arch ABI - unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden) - unsigned compArgsCount; // Number of arguments (incl. implicit and hidden) - unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present); - int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE) - unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var) + var_types compRetType; // Return type of the method as declared in IL + var_types compRetNativeType; // Normalized return type as per target arch ABI + unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden) + unsigned compArgsCount; // Number of arguments (incl. implicit and hidden) + unsigned compArgRegCount; // Number of integer locals + unsigned compOtherArgRegCount; // Number of multireg args + unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present); + int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE) + unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var) unsigned compILlocalsCount; // Number of vars : args + locals (incl. implicit but not hidden) unsigned compLocalsCount; // Number of vars : args + locals (incl. implicit and hidden) unsigned compMaxStack; diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 2777ce978450..2e06caeb7085 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -239,9 +239,11 @@ void Compiler::lvaInitTypeRef() // Finally the local variables //------------------------------------------------------------------------- - unsigned varNum = varDscInfo.varNum; - LclVarDsc* varDsc = varDscInfo.varDsc; - CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args; + unsigned argRegNum = varDscInfo.intRegArgNum; + unsigned otherArgRegNum = varDscInfo.floatRegArgNum; + unsigned varNum = varDscInfo.varNum; + LclVarDsc* varDsc = varDscInfo.varDsc; + CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args; for (unsigned i = 0; i < info.compMethodInfo->locals.numArgs; i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig)) @@ -262,6 +264,9 @@ void Compiler::lvaInitTypeRef() } } + info.compArgRegCount = argRegNum; + info.compOtherArgRegCount = otherArgRegNum; + if ( // If there already exist unsafe buffers, don't mark more structs as unsafe // as that will cause them to be placed along with the real unsafe buffers, // unnecessarily exposing them to overruns. This can affect GS tests which diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 5c46d3d23589..9542b695f592 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1714,7 +1714,6 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode); assert(argTabEntry); unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount; - noway_assert(callerArgNum < comp->info.compArgsCount); unsigned callerArgLclNum = callerArgNum; LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 1acb8d2981b2..076d34fa5c3a 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -6943,20 +6943,23 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // Note that callee being a vararg method is not a problem since we can account the params being passed. // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie) - unsigned nCallerArgs = info.compArgsCount; + unsigned callerArgRegCount = info.compArgRegCount; + unsigned callerOtherArgRegCount = info.compOtherArgRegCount; // Count the callee args including implicit and hidden. // Note that GenericContext and VarargCookie are added by importer while // importing the call to gtCallArgs list along with explicit user args. - unsigned nCalleeArgs = 0; + unsigned calleeArgRegCount = 0; + unsigned calleeOtherArgRegCount = 0; + if (callee->gtCallObjp) // thisPtr { - nCalleeArgs++; + ++calleeArgRegCount; } if (callee->HasRetBufArg()) // RetBuf { - nCalleeArgs++; + ++calleeArgRegCount; // If callee has RetBuf param, caller too must have it. // Otherwise go the slow route. @@ -6973,8 +6976,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) bool hasMultiByteArgs = false; for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2) { - nCalleeArgs++; - assert(args->OperIsList()); GenTreePtr argx = args->gtOp.gtOp1; @@ -7004,21 +7005,61 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) unsigned typeSize = 0; hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false); -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_) - // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers. - // Account for the second eightbyte in the nCalleeArgs. - // https://github.com/dotnet/coreclr/issues/2666 - // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated - // as two args, since they need two registers whereas nCallerArgs is - // counting such an arg as one. This would mean we will not be optimizing - // certain calls though technically possible. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; - if (typeSize > TARGET_POINTER_SIZE) + assert(objClass != nullptr); + eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); + if (structDesc.passedInRegisters) { - unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE); - nCalleeArgs += extraArgRegsToAdd; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.IsIntegralSlot(i)) + { + ++calleeArgRegCount; + } + else if (structDesc.IsSseSlot(i)) + { + ++calleeOtherArgRegCount; + } + else + { + assert(false && "Invalid eightbyte classification type."); + break; + } + } } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_ + +#else // ARM64 + var_types hfaType = GetHfaType(argx); + bool isHfaArg = varTypeIsFloating(hfaType); + unsigned size = 1; + + if (isHfaArg) + { + size = GetHfaCount(argx); + // HFA structs are passed by value in multiple registers + calleeArgRegCount += size; + } + else + { + // Structs are either passed in 1 or 2 (64-bit) slots + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), + TARGET_POINTER_SIZE)) / + TARGET_POINTER_SIZE; + + if (size <= 2) + { + // Structs that are the size of 2 pointers are passed by value in multiple registers + calleeArgRegCount += size; + } + else if (size > 2) + { + size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by + // reference (to a copy) + } + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #else assert(!"Target platform ABI rules regarding passing struct type args in registers"); @@ -7030,6 +7071,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) hasMultiByteArgs = true; } } + else + { + ++calleeArgRegCount; + } } // Go the slow route, if it has multi-byte params @@ -7038,6 +7083,19 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) return false; } + const unsigned maxRegArgs = MAX_REG_ARG; + bool hasOtherRegs = callerOtherArgRegCount > 0 || calleeOtherArgRegCount > 0; + + // Logging to help determine why the callee has been marked as canFastTailCall + JITDUMP("\ncanFastTailCall:\n"); + JITDUMP("calleeArgRegCount (%d) > maxRegArgs: (%d): %s\n", + calleeArgRegCount, maxRegArgs, calleeArgRegCount > MAX_REG_ARG ? "true" : "false"); + JITDUMP("callerArgRegCount (%d) < calleeArgRegCount (%d): %s\n", + callerArgRegCount, calleeArgRegCount, callerArgRegCount < calleeArgRegCount ? "true" : "false"); + + JITDUMP("!hasOtherRegs (%d) && (callerOtherArgRegCount (%d) < calleeOtherArgRegCount (%d)): %s\n", + (int)hasOtherRegs, callerOtherArgRegCount, calleeOtherArgRegCount, !hasOtherRegs && (callerOtherArgRegCount < calleeOtherArgRegCount) ? "true" : "false"); + // If we reached here means that callee has only those argument types which can be passed in // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. // If we are passing args on stack for callee and it has more args passed on stack than @@ -7045,7 +7103,11 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // // Note that the GC'ness of on stack args need not match since the arg setup area is marked // as non-interruptible for fast tail calls. - if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs)) + if ((calleeArgRegCount > MAX_REG_ARG) && + (callerArgRegCount < calleeArgRegCount) && + (!hasOtherRegs || (callerOtherArgRegCount < calleeOtherArgRegCount)) // Make sure that we have structs passed in regs + // and avoid 0 < 0 + ) { return false; } diff --git a/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs new file mode 100644 index 000000000000..5ddc0f60e7f6 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs @@ -0,0 +1,81 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; + +// 10 byte struct +public struct A +{ + public int a; + public int b; + public short c; +} + +// 32 byte struct +public struct B +{ + public long a; + public long b; + public long c; + public long d; +} + +public class DoNotFastTailCallWithStructs +{ + static A a; + static B b; + + public static void Foo(B b, int count=1000) + { + if (count == 100) + { + return count; + } + + if (count-- % 2 == 0) + { + return Foo(a, count); + } + + else + { + return Foo(b, count); + } + } + + public static void Foo(A a, int count=1000) + { + if (count == 100) + { + return count; + } + + if (count-- % 2 == 0) + { + return Foo(a, count); + } + + else + { + return Foo(b, count); + } + } + + public static int Main() + { + a = new A(); + b = new B(); + + a.a = 100; + a.b = 1000; + a.c = 10000; + + b.a = 500; + b.b = 5000; + b.c = 50000; + b.d = 500000; + + return Foo(a); + } +} \ No newline at end of file diff --git a/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj new file mode 100644 index 000000000000..1a8a6d7acfac --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj @@ -0,0 +1,45 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + + False + + + + + None + True + True + True + True + True + True + $(DefineConstants);CORECLR + + + + + + + + + + + diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs b/tests/src/JIT/opt/FastTailCall/StackFixup.cs similarity index 89% rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs rename to tests/src/JIT/opt/FastTailCall/StackFixup.cs index f4caf37fd279..eaf095032aad 100644 --- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs +++ b/tests/src/JIT/opt/FastTailCall/StackFixup.cs @@ -1,3 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. using System; public struct A diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj similarity index 97% rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj rename to tests/src/JIT/opt/FastTailCall/StackFixup.csproj index b065595dd6b5..f7e3e9792b67 100644 --- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj +++ b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj @@ -34,7 +34,7 @@ $(DefineConstants);CORECLR - + diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs new file mode 100644 index 000000000000..13ca0cbc4827 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +using System; + +// 10 byte struct +public struct A +{ + public int a; + public int b; + public short c; +} + +class TailCallStructPassingSimple +{ + // Simple tail call candidate that would be ignored on Arm64 and amd64 Unix + // due to https://github.com/dotnet/coreclr/issues/2666 + public static int ImplicitTailCallTenByteStruct(A a, int count=1000) + { + if (count-- == 0) + { + return 100; + } + + return ImplicitTailCallTenByteStruct(a, count); + } + + public static int Main() + { + A temp = new A(); + temp.a = 50; + temp.b = 500; + temp.c = 62; + + int ret = ImplicitTailCallTenByteStruct(temp); + return ret; + } +} \ No newline at end of file diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj new file mode 100644 index 000000000000..0cac87b23c77 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj @@ -0,0 +1,33 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + + + + + + + + + False + + + + + + + + + + + +