diff --git a/src/jit/compiler.h b/src/jit/compiler.h index d4388628c609..416c097c6f01 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -8186,13 +8186,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool compPublishStubParam : 1; // EAX captured in prolog will be available through an instrinsic bool compRetBuffDefStack : 1; // The ret buff argument definitely points into the stack. - var_types compRetType; // Return type of the method as declared in IL - var_types compRetNativeType; // Normalized return type as per target arch ABI - unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden) - unsigned compArgsCount; // Number of arguments (incl. implicit and hidden) - unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present); - int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE) - unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var) + var_types compRetType; // Return type of the method as declared in IL + var_types compRetNativeType; // Normalized return type as per target arch ABI + unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden) + unsigned compArgsCount; // Number of arguments (incl. implicit and hidden) + unsigned compArgRegCount; // Number of integer locals + unsigned compOtherArgRegCount; // Number of multireg args + unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present); + int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE) + unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var) unsigned compILlocalsCount; // Number of vars : args + locals (incl. implicit but not hidden) unsigned compLocalsCount; // Number of vars : args + locals (incl. implicit and hidden) unsigned compMaxStack; diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 2777ce978450..2e06caeb7085 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -239,9 +239,11 @@ void Compiler::lvaInitTypeRef() // Finally the local variables //------------------------------------------------------------------------- - unsigned varNum = varDscInfo.varNum; - LclVarDsc* varDsc = varDscInfo.varDsc; - CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args; + unsigned argRegNum = varDscInfo.intRegArgNum; + unsigned otherArgRegNum = varDscInfo.floatRegArgNum; + unsigned varNum = varDscInfo.varNum; + LclVarDsc* varDsc = varDscInfo.varDsc; + CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args; for (unsigned i = 0; i < info.compMethodInfo->locals.numArgs; i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig)) @@ -262,6 +264,9 @@ void Compiler::lvaInitTypeRef() } } + info.compArgRegCount = argRegNum; + info.compOtherArgRegCount = otherArgRegNum; + if ( // If there already exist unsafe buffers, don't mark more structs as unsafe // as that will cause them to be placed along with the real unsafe buffers, // unnecessarily exposing them to overruns. This can affect GS tests which diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 5c46d3d23589..9542b695f592 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -1714,7 +1714,6 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode); assert(argTabEntry); unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount; - noway_assert(callerArgNum < comp->info.compArgsCount); unsigned callerArgLclNum = callerArgNum; LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 1acb8d2981b2..076d34fa5c3a 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -6943,20 +6943,23 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // Note that callee being a vararg method is not a problem since we can account the params being passed. // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie) - unsigned nCallerArgs = info.compArgsCount; + unsigned callerArgRegCount = info.compArgRegCount; + unsigned callerOtherArgRegCount = info.compOtherArgRegCount; // Count the callee args including implicit and hidden. // Note that GenericContext and VarargCookie are added by importer while // importing the call to gtCallArgs list along with explicit user args. - unsigned nCalleeArgs = 0; + unsigned calleeArgRegCount = 0; + unsigned calleeOtherArgRegCount = 0; + if (callee->gtCallObjp) // thisPtr { - nCalleeArgs++; + ++calleeArgRegCount; } if (callee->HasRetBufArg()) // RetBuf { - nCalleeArgs++; + ++calleeArgRegCount; // If callee has RetBuf param, caller too must have it. // Otherwise go the slow route. @@ -6973,8 +6976,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) bool hasMultiByteArgs = false; for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2) { - nCalleeArgs++; - assert(args->OperIsList()); GenTreePtr argx = args->gtOp.gtOp1; @@ -7004,21 +7005,61 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) unsigned typeSize = 0; hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false); -#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_) - // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers. - // Account for the second eightbyte in the nCalleeArgs. - // https://github.com/dotnet/coreclr/issues/2666 - // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated - // as two args, since they need two registers whereas nCallerArgs is - // counting such an arg as one. This would mean we will not be optimizing - // certain calls though technically possible. +#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) + SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; - if (typeSize > TARGET_POINTER_SIZE) + assert(objClass != nullptr); + eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); + if (structDesc.passedInRegisters) { - unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE); - nCalleeArgs += extraArgRegsToAdd; + for (unsigned int i = 0; i < structDesc.eightByteCount; i++) + { + if (structDesc.IsIntegralSlot(i)) + { + ++calleeArgRegCount; + } + else if (structDesc.IsSseSlot(i)) + { + ++calleeOtherArgRegCount; + } + else + { + assert(false && "Invalid eightbyte classification type."); + break; + } + } } -#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_ + +#else // ARM64 + var_types hfaType = GetHfaType(argx); + bool isHfaArg = varTypeIsFloating(hfaType); + unsigned size = 1; + + if (isHfaArg) + { + size = GetHfaCount(argx); + // HFA structs are passed by value in multiple registers + calleeArgRegCount += size; + } + else + { + // Structs are either passed in 1 or 2 (64-bit) slots + size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd), + TARGET_POINTER_SIZE)) / + TARGET_POINTER_SIZE; + + if (size <= 2) + { + // Structs that are the size of 2 pointers are passed by value in multiple registers + calleeArgRegCount += size; + } + else if (size > 2) + { + size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by + // reference (to a copy) + } + } +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING #else assert(!"Target platform ABI rules regarding passing struct type args in registers"); @@ -7030,6 +7071,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) hasMultiByteArgs = true; } } + else + { + ++calleeArgRegCount; + } } // Go the slow route, if it has multi-byte params @@ -7038,6 +7083,19 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) return false; } + const unsigned maxRegArgs = MAX_REG_ARG; + bool hasOtherRegs = callerOtherArgRegCount > 0 || calleeOtherArgRegCount > 0; + + // Logging to help determine why the callee has been marked as canFastTailCall + JITDUMP("\ncanFastTailCall:\n"); + JITDUMP("calleeArgRegCount (%d) > maxRegArgs: (%d): %s\n", + calleeArgRegCount, maxRegArgs, calleeArgRegCount > MAX_REG_ARG ? "true" : "false"); + JITDUMP("callerArgRegCount (%d) < calleeArgRegCount (%d): %s\n", + callerArgRegCount, calleeArgRegCount, callerArgRegCount < calleeArgRegCount ? "true" : "false"); + + JITDUMP("!hasOtherRegs (%d) && (callerOtherArgRegCount (%d) < calleeOtherArgRegCount (%d)): %s\n", + (int)hasOtherRegs, callerOtherArgRegCount, calleeOtherArgRegCount, !hasOtherRegs && (callerOtherArgRegCount < calleeOtherArgRegCount) ? "true" : "false"); + // If we reached here means that callee has only those argument types which can be passed in // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. // If we are passing args on stack for callee and it has more args passed on stack than @@ -7045,7 +7103,11 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // // Note that the GC'ness of on stack args need not match since the arg setup area is marked // as non-interruptible for fast tail calls. - if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs)) + if ((calleeArgRegCount > MAX_REG_ARG) && + (callerArgRegCount < calleeArgRegCount) && + (!hasOtherRegs || (callerOtherArgRegCount < calleeOtherArgRegCount)) // Make sure that we have structs passed in regs + // and avoid 0 < 0 + ) { return false; } diff --git a/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs new file mode 100644 index 000000000000..5ddc0f60e7f6 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs @@ -0,0 +1,81 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; + +// 10 byte struct +public struct A +{ + public int a; + public int b; + public short c; +} + +// 32 byte struct +public struct B +{ + public long a; + public long b; + public long c; + public long d; +} + +public class DoNotFastTailCallWithStructs +{ + static A a; + static B b; + + public static void Foo(B b, int count=1000) + { + if (count == 100) + { + return count; + } + + if (count-- % 2 == 0) + { + return Foo(a, count); + } + + else + { + return Foo(b, count); + } + } + + public static void Foo(A a, int count=1000) + { + if (count == 100) + { + return count; + } + + if (count-- % 2 == 0) + { + return Foo(a, count); + } + + else + { + return Foo(b, count); + } + } + + public static int Main() + { + a = new A(); + b = new B(); + + a.a = 100; + a.b = 1000; + a.c = 10000; + + b.a = 500; + b.b = 5000; + b.c = 50000; + b.d = 500000; + + return Foo(a); + } +} \ No newline at end of file diff --git a/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj new file mode 100644 index 000000000000..1a8a6d7acfac --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj @@ -0,0 +1,45 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + + + False + + + + + None + True + True + True + True + True + True + $(DefineConstants);CORECLR + + + + + + + + + + + diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs b/tests/src/JIT/opt/FastTailCall/StackFixup.cs similarity index 89% rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs rename to tests/src/JIT/opt/FastTailCall/StackFixup.cs index f4caf37fd279..eaf095032aad 100644 --- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs +++ b/tests/src/JIT/opt/FastTailCall/StackFixup.cs @@ -1,3 +1,6 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. using System; public struct A diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj similarity index 97% rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj rename to tests/src/JIT/opt/FastTailCall/StackFixup.csproj index b065595dd6b5..f7e3e9792b67 100644 --- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj +++ b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj @@ -34,7 +34,7 @@ $(DefineConstants);CORECLR - + diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs new file mode 100644 index 000000000000..13ca0cbc4827 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +using System; + +// 10 byte struct +public struct A +{ + public int a; + public int b; + public short c; +} + +class TailCallStructPassingSimple +{ + // Simple tail call candidate that would be ignored on Arm64 and amd64 Unix + // due to https://github.com/dotnet/coreclr/issues/2666 + public static int ImplicitTailCallTenByteStruct(A a, int count=1000) + { + if (count-- == 0) + { + return 100; + } + + return ImplicitTailCallTenByteStruct(a, count); + } + + public static int Main() + { + A temp = new A(); + temp.a = 50; + temp.b = 500; + temp.c = 62; + + int ret = ImplicitTailCallTenByteStruct(temp); + return ret; + } +} \ No newline at end of file diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj new file mode 100644 index 000000000000..0cac87b23c77 --- /dev/null +++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj @@ -0,0 +1,33 @@ + + + + + Debug + AnyCPU + $(MSBuildProjectName) + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + + + + + + + + + False + + + + + + + + + + + +