diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index d4388628c609..416c097c6f01 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -8186,13 +8186,15 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
bool compPublishStubParam : 1; // EAX captured in prolog will be available through an instrinsic
bool compRetBuffDefStack : 1; // The ret buff argument definitely points into the stack.
- var_types compRetType; // Return type of the method as declared in IL
- var_types compRetNativeType; // Normalized return type as per target arch ABI
- unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden)
- unsigned compArgsCount; // Number of arguments (incl. implicit and hidden)
- unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present);
- int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE)
- unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var)
+ var_types compRetType; // Return type of the method as declared in IL
+ var_types compRetNativeType; // Normalized return type as per target arch ABI
+ unsigned compILargsCount; // Number of arguments (incl. implicit but not hidden)
+ unsigned compArgsCount; // Number of arguments (incl. implicit and hidden)
+ unsigned compArgRegCount; // Number of integer locals
+ unsigned compOtherArgRegCount; // Number of multireg args
+ unsigned compRetBuffArg; // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present);
+ int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE)
+ unsigned compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var)
unsigned compILlocalsCount; // Number of vars : args + locals (incl. implicit but not hidden)
unsigned compLocalsCount; // Number of vars : args + locals (incl. implicit and hidden)
unsigned compMaxStack;
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index 2777ce978450..2e06caeb7085 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -239,9 +239,11 @@ void Compiler::lvaInitTypeRef()
// Finally the local variables
//-------------------------------------------------------------------------
- unsigned varNum = varDscInfo.varNum;
- LclVarDsc* varDsc = varDscInfo.varDsc;
- CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args;
+ unsigned argRegNum = varDscInfo.intRegArgNum;
+ unsigned otherArgRegNum = varDscInfo.floatRegArgNum;
+ unsigned varNum = varDscInfo.varNum;
+ LclVarDsc* varDsc = varDscInfo.varDsc;
+ CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args;
for (unsigned i = 0; i < info.compMethodInfo->locals.numArgs;
i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig))
@@ -262,6 +264,9 @@ void Compiler::lvaInitTypeRef()
}
}
+ info.compArgRegCount = argRegNum;
+ info.compOtherArgRegCount = otherArgRegNum;
+
if ( // If there already exist unsafe buffers, don't mark more structs as unsafe
// as that will cause them to be placed along with the real unsafe buffers,
// unnecessarily exposing them to overruns. This can affect GS tests which
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index 5c46d3d23589..9542b695f592 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -1714,7 +1714,6 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode);
assert(argTabEntry);
unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount;
- noway_assert(callerArgNum < comp->info.compArgsCount);
unsigned callerArgLclNum = callerArgNum;
LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum;
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 1acb8d2981b2..076d34fa5c3a 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -6943,20 +6943,23 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
// Note that callee being a vararg method is not a problem since we can account the params being passed.
// Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
- unsigned nCallerArgs = info.compArgsCount;
+ unsigned callerArgRegCount = info.compArgRegCount;
+ unsigned callerOtherArgRegCount = info.compOtherArgRegCount;
// Count the callee args including implicit and hidden.
// Note that GenericContext and VarargCookie are added by importer while
// importing the call to gtCallArgs list along with explicit user args.
- unsigned nCalleeArgs = 0;
+ unsigned calleeArgRegCount = 0;
+ unsigned calleeOtherArgRegCount = 0;
+
if (callee->gtCallObjp) // thisPtr
{
- nCalleeArgs++;
+ ++calleeArgRegCount;
}
if (callee->HasRetBufArg()) // RetBuf
{
- nCalleeArgs++;
+ ++calleeArgRegCount;
// If callee has RetBuf param, caller too must have it.
// Otherwise go the slow route.
@@ -6973,8 +6976,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
bool hasMultiByteArgs = false;
for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
{
- nCalleeArgs++;
-
assert(args->OperIsList());
GenTreePtr argx = args->gtOp.gtOp1;
@@ -7004,21 +7005,61 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
unsigned typeSize = 0;
hasMultiByteArgs = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
-#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
- // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
- // Account for the second eightbyte in the nCalleeArgs.
- // https://github.com/dotnet/coreclr/issues/2666
- // TODO-CQ-Amd64-Unix/arm64: Structs of size between 9 to 16 bytes are conservatively estimated
- // as two args, since they need two registers whereas nCallerArgs is
- // counting such an arg as one. This would mean we will not be optimizing
- // certain calls though technically possible.
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
- if (typeSize > TARGET_POINTER_SIZE)
+ assert(objClass != nullptr);
+ eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
+ if (structDesc.passedInRegisters)
{
- unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
- nCalleeArgs += extraArgRegsToAdd;
+ for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+ {
+ if (structDesc.IsIntegralSlot(i))
+ {
+ ++calleeArgRegCount;
+ }
+ else if (structDesc.IsSseSlot(i))
+ {
+ ++calleeOtherArgRegCount;
+ }
+ else
+ {
+ assert(false && "Invalid eightbyte classification type.");
+ break;
+ }
+ }
}
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
+
+#else // ARM64
+ var_types hfaType = GetHfaType(argx);
+ bool isHfaArg = varTypeIsFloating(hfaType);
+ unsigned size = 1;
+
+ if (isHfaArg)
+ {
+ size = GetHfaCount(argx);
+ // HFA structs are passed by value in multiple registers
+ calleeArgRegCount += size;
+ }
+ else
+ {
+ // Structs are either passed in 1 or 2 (64-bit) slots
+ size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+ TARGET_POINTER_SIZE)) /
+ TARGET_POINTER_SIZE;
+
+ if (size <= 2)
+ {
+ // Structs that are the size of 2 pointers are passed by value in multiple registers
+ calleeArgRegCount += size;
+ }
+ else if (size > 2)
+ {
+ size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
+ // reference (to a copy)
+ }
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
#else
assert(!"Target platform ABI rules regarding passing struct type args in registers");
@@ -7030,6 +7071,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
hasMultiByteArgs = true;
}
}
+ else
+ {
+ ++calleeArgRegCount;
+ }
}
// Go the slow route, if it has multi-byte params
@@ -7038,6 +7083,19 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
return false;
}
+ const unsigned maxRegArgs = MAX_REG_ARG;
+ bool hasOtherRegs = callerOtherArgRegCount > 0 || calleeOtherArgRegCount > 0;
+
+ // Logging to help determine why the callee has been marked as canFastTailCall
+ JITDUMP("\ncanFastTailCall:\n");
+ JITDUMP("calleeArgRegCount (%d) > maxRegArgs: (%d): %s\n",
+ calleeArgRegCount, maxRegArgs, calleeArgRegCount > MAX_REG_ARG ? "true" : "false");
+ JITDUMP("callerArgRegCount (%d) < calleeArgRegCount (%d): %s\n",
+ callerArgRegCount, calleeArgRegCount, callerArgRegCount < calleeArgRegCount ? "true" : "false");
+
+ JITDUMP("!hasOtherRegs (%d) && (callerOtherArgRegCount (%d) < calleeOtherArgRegCount (%d)): %s\n",
+ (int)hasOtherRegs, callerOtherArgRegCount, calleeOtherArgRegCount, !hasOtherRegs && (callerOtherArgRegCount < calleeOtherArgRegCount) ? "true" : "false");
+
// If we reached here means that callee has only those argument types which can be passed in
// a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
// If we are passing args on stack for callee and it has more args passed on stack than
@@ -7045,7 +7103,11 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
//
// Note that the GC'ness of on stack args need not match since the arg setup area is marked
// as non-interruptible for fast tail calls.
- if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
+ if ((calleeArgRegCount > MAX_REG_ARG) &&
+ (callerArgRegCount < calleeArgRegCount) &&
+ (!hasOtherRegs || (callerOtherArgRegCount < calleeOtherArgRegCount)) // Make sure that we have structs passed in regs
+ // and avoid 0 < 0
+ )
{
return false;
}
diff --git a/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs
new file mode 100644
index 000000000000..5ddc0f60e7f6
--- /dev/null
+++ b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.cs
@@ -0,0 +1,81 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+// 10 byte struct
+public struct A
+{
+ public int a;
+ public int b;
+ public short c;
+}
+
+// 32 byte struct
+public struct B
+{
+ public long a;
+ public long b;
+ public long c;
+ public long d;
+}
+
+public class DoNotFastTailCallWithStructs
+{
+ static A a;
+ static B b;
+
+ public static void Foo(B b, int count=1000)
+ {
+ if (count == 100)
+ {
+ return count;
+ }
+
+ if (count-- % 2 == 0)
+ {
+ return Foo(a, count);
+ }
+
+ else
+ {
+ return Foo(b, count);
+ }
+ }
+
+ public static void Foo(A a, int count=1000)
+ {
+ if (count == 100)
+ {
+ return count;
+ }
+
+ if (count-- % 2 == 0)
+ {
+ return Foo(a, count);
+ }
+
+ else
+ {
+ return Foo(b, count);
+ }
+ }
+
+ public static int Main()
+ {
+ a = new A();
+ b = new B();
+
+ a.a = 100;
+ a.b = 1000;
+ a.c = 10000;
+
+ b.a = 500;
+ b.b = 5000;
+ b.c = 50000;
+ b.d = 500000;
+
+ return Foo(a);
+ }
+}
\ No newline at end of file
diff --git a/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj
new file mode 100644
index 000000000000..1a8a6d7acfac
--- /dev/null
+++ b/tests/src/JIT/opt/FastTailCall/DoNotFastTailCallWithStructs.csproj
@@ -0,0 +1,45 @@
+
+
+
+
+ Debug
+ AnyCPU
+ $(MSBuildProjectName)
+ 2.0
+ {95DFC527-4DC1-495E-97D7-E94EE1F7140D}
+ Exe
+ {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
+ ..\..\
+ true
+
+
+
+
+
+
+
+
+ False
+
+
+
+
+ None
+ True
+ True
+ True
+ True
+ True
+ True
+ $(DefineConstants);CORECLR
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs b/tests/src/JIT/opt/FastTailCall/StackFixup.cs
similarity index 89%
rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs
rename to tests/src/JIT/opt/FastTailCall/StackFixup.cs
index f4caf37fd279..eaf095032aad 100644
--- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.cs
+++ b/tests/src/JIT/opt/FastTailCall/StackFixup.cs
@@ -1,3 +1,6 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
using System;
public struct A
diff --git a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj
similarity index 97%
rename from tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj
rename to tests/src/JIT/opt/FastTailCall/StackFixup.csproj
index b065595dd6b5..f7e3e9792b67 100644
--- a/tests/src/JIT/opt/Tailcall/FastTailCallStackFixup.csproj
+++ b/tests/src/JIT/opt/FastTailCall/StackFixup.csproj
@@ -34,7 +34,7 @@
$(DefineConstants);CORECLR
-
+
diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs
new file mode 100644
index 000000000000..13ca0cbc4827
--- /dev/null
+++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.cs
@@ -0,0 +1,38 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+using System;
+
+// 10 byte struct
+public struct A
+{
+ public int a;
+ public int b;
+ public short c;
+}
+
+class TailCallStructPassingSimple
+{
+ // Simple tail call candidate that would be ignored on Arm64 and amd64 Unix
+ // due to https://github.com/dotnet/coreclr/issues/2666
+ public static int ImplicitTailCallTenByteStruct(A a, int count=1000)
+ {
+ if (count-- == 0)
+ {
+ return 100;
+ }
+
+ return ImplicitTailCallTenByteStruct(a, count);
+ }
+
+ public static int Main()
+ {
+ A temp = new A();
+ temp.a = 50;
+ temp.b = 500;
+ temp.c = 62;
+
+ int ret = ImplicitTailCallTenByteStruct(temp);
+ return ret;
+ }
+}
\ No newline at end of file
diff --git a/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj
new file mode 100644
index 000000000000..0cac87b23c77
--- /dev/null
+++ b/tests/src/JIT/opt/FastTailCall/StructPassingSimple.csproj
@@ -0,0 +1,33 @@
+
+
+
+
+ Debug
+ AnyCPU
+ $(MSBuildProjectName)
+ 2.0
+ {95DFC527-4DC1-495E-97D7-E94EE1F7140D}
+ Exe
+ {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
+ ..\..\
+
+
+
+
+
+
+
+
+ False
+
+
+
+
+
+
+
+
+
+
+
+