diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index c87dbb11601f8..8f8d6a4913537 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -433,7 +433,7 @@ class StubLinkerCPU : public StubLinker private: void EmitLoadStoreRegPairImm(DWORD flags, int regNum1, int regNum2, IntReg Xn, int offset, BOOL isVec); - void EmitLoadStoreRegImm(DWORD flags, int regNum, IntReg Xn, int offset, BOOL isVec); + void EmitLoadStoreRegImm(DWORD flags, int regNum, IntReg Xn, int offset, BOOL isVec, int log2Size = 3); public: // BitFlags for EmitLoadStoreReg(Pair)Imm methods @@ -484,7 +484,7 @@ class StubLinkerCPU : public StubLinker void EmitLoadStoreRegPairImm(DWORD flags, IntReg Xt1, IntReg Xt2, IntReg Xn, int offset=0); void EmitLoadStoreRegPairImm(DWORD flags, VecReg Vt1, VecReg Vt2, IntReg Xn, int offset=0); - void EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset=0); + void EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset=0, int log2Size = 3); void EmitLoadStoreRegImm(DWORD flags, VecReg Vt, IntReg Xn, int offset=0); void EmitLoadRegReg(IntReg Xt, IntReg Xn, IntReg Xm, DWORD option); diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index bfff364d57e6c..096a202af2c7b 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -1198,7 +1198,7 @@ AdjustContextForVirtualStub( { pExceptionRecord->ExceptionAddress = (PVOID)callsite; } - + SetIP(pContext, callsite); return TRUE; @@ -1567,29 +1567,32 @@ void StubLinkerCPU::EmitLoadStoreRegPairImm(DWORD flags, int regNum1, int regNum } -void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset) +void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, IntReg Xt, IntReg Xn, int offset, int log2Size) { - EmitLoadStoreRegImm(flags, (int)Xt, Xn, offset, FALSE); + EmitLoadStoreRegImm(flags, (int)Xt, Xn, offset, FALSE, log2Size); } void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, VecReg Vt, IntReg Xn, int offset) { EmitLoadStoreRegImm(flags, (int)Vt, Xn, offset, TRUE); } -void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, int regNum, IntReg Xn, int offset, BOOL isVec) +void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, int regNum, IntReg Xn, int offset, BOOL isVec, int log2Size) { // Encoding: // wb=1 : [size(2)=11] | 1 | 1 | 1 | [IsVec(1)] | 0 | [!writeBack(1)] | 0 | [isLoad(1)] | 0 | [imm(7)] | [!postIndex(1)] | [Xn(5)] | [Xt(5)] // wb=0 : [size(2)=11] | 1 | 1 | 1 | [IsVec(1)] | 0 | [!writeBack(1)] | 0 | [isLoad(1)] | [ imm(12) ] | [Xn(5)] | [Xt(5)] // where IsVec=0 for IntReg, 1 for VecReg + _ASSERTE((log2Size & ~0x3ULL) == 0); + BOOL isLoad = flags & 1; BOOL writeBack = flags & 2; BOOL postIndex = flags & 4; if (writeBack) { _ASSERTE(-256 <= offset && offset <= 255); - Emit32((DWORD) ( (0x1F<<27) | + Emit32((DWORD) ( (log2Size << 30) | + (0x7<<27) | (!!isVec<<26) | (!writeBack<<24) | (!!isLoad<<22) | @@ -1602,13 +1605,16 @@ void StubLinkerCPU::EmitLoadStoreRegImm(DWORD flags, int regNum, IntReg Xn, int } else { - _ASSERTE((0 <= offset) && (offset <= 32760)); - _ASSERTE((offset & 7) == 0); - Emit32((DWORD) ( (0x1F<<27) | + int scaledOffset = 0xFFF & (offset >> log2Size); + + _ASSERTE(offset == (scaledOffset << log2Size)); + + Emit32((DWORD) ( (log2Size << 30) | + (0x7<<27) | (!!isVec<<26) | (!writeBack<<24) | (!!isLoad<<22) | - ((0xFFF & (offset >> 3)) << 10) | + (scaledOffset << 10) | (Xn<<5) | (regNum)) ); @@ -1715,15 +1721,25 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) { // If source is present in register then destination must also be a register _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK); + _ASSERTE(!(pEntry->dstofs & ShuffleEntry::FPREGMASK)); + _ASSERTE(!(pEntry->srcofs & ShuffleEntry::FPREGMASK)); - EmitMovReg(IntReg(pEntry->dstofs & ShuffleEntry::OFSMASK), IntReg(pEntry->srcofs & ShuffleEntry::OFSMASK)); + EmitMovReg(IntReg(pEntry->dstofs & ShuffleEntry::OFSREGMASK), IntReg(pEntry->srcofs & ShuffleEntry::OFSREGMASK)); } else if (pEntry->dstofs & ShuffleEntry::REGMASK) { // source must be on the stack _ASSERTE(!(pEntry->srcofs & ShuffleEntry::REGMASK)); + _ASSERTE(!(pEntry->dstofs & ShuffleEntry::FPREGMASK)); + - EmitLoadStoreRegImm(eLOAD, IntReg(pEntry->dstofs & ShuffleEntry::OFSMASK), RegSp, pEntry->srcofs * sizeof(void*)); +#if !defined(TARGET_OSX) + EmitLoadStoreRegImm(eLOAD, IntReg(pEntry->dstofs & ShuffleEntry::OFSREGMASK), RegSp, pEntry->srcofs * sizeof(void*)); +#else + int log2Size = (pEntry->srcofs >> 12); + int srcOffset = int(pEntry->srcofs & 0xfff) << log2Size; + EmitLoadStoreRegImm(eLOAD, IntReg(pEntry->dstofs & ShuffleEntry::OFSREGMASK), RegSp, srcOffset, log2Size); +#endif } else { @@ -1733,8 +1749,22 @@ VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) // dest must be on the stack _ASSERTE(!(pEntry->dstofs & ShuffleEntry::REGMASK)); +#if !defined(TARGET_OSX) EmitLoadStoreRegImm(eLOAD, IntReg(9), RegSp, pEntry->srcofs * sizeof(void*)); EmitLoadStoreRegImm(eSTORE, IntReg(9), RegSp, pEntry->dstofs * sizeof(void*)); +#else + // Decode ShuffleIterator::GetNextOfs() encoded entries + // See comments in that function + + // We expect src and dst stack size to be the same. + _ASSERTE((pEntry->srcofs >> 12) == (pEntry->dstofs >> 12)); + int log2Size = (pEntry->srcofs >> 12); + int srcOffset = int(pEntry->srcofs & 0xfff) << log2Size; + int dstOffset = int(pEntry->dstofs & 0xfff) << log2Size; + + EmitLoadStoreRegImm(eLOAD, IntReg(9), RegSp, srcOffset, log2Size); + EmitLoadStoreRegImm(eSTORE, IntReg(9), RegSp, dstOffset, log2Size); +#endif } } @@ -1757,7 +1787,7 @@ VOID StubLinkerCPU::EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, s _ASSERTE(pEntry->dstofs != ShuffleEntry::HELPERREG); _ASSERTE(pEntry->srcofs != ShuffleEntry::HELPERREG); - EmitMovReg(IntReg(pEntry->dstofs & ShuffleEntry::OFSMASK), IntReg(pEntry->srcofs & ShuffleEntry::OFSMASK)); + EmitMovReg(IntReg(pEntry->dstofs & ShuffleEntry::OFSREGMASK), IntReg(pEntry->srcofs & ShuffleEntry::OFSREGMASK)); } MetaSig msig(pSharedMD); diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index deab21ca97efe..8064adfdfc97f 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -715,6 +715,11 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE pLoc->m_byteStackIndex = TransitionBlock::GetStackArgumentByteIndexFromOffset(argOffset); const bool isValueType = (m_argType == ELEMENT_TYPE_VALUETYPE); const bool isFloatHfa = (isValueType && !m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()); + if (isFloatHfa) + { + CorInfoHFAElemType type = m_argTypeHandle.GetHFAType(); + pLoc->setHFAFieldSize(type); + } pLoc->m_byteStackSize = StackElemSize(byteArgSize, isValueType, isFloatHfa); } } diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index a500c76603f18..87fc358c1fb10 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -147,8 +147,11 @@ class ShuffleIterator } // Get next offset to shuffle. There has to be at least one offset left. - // For register arguments it returns regNum | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK. - // For stack arguments it returns stack offset in bytes with negative sign. + // It returns an offset encoded properly for a ShuffleEntry offset. + // - For floating register arguments it returns regNum | ShuffleEntry::REGMASK | ShuffleEntry::FPREGMASK. + // - For register arguments it returns regNum | ShuffleEntry::REGMASK. + // - For stack arguments it returns stack offset index in stack slots for most architectures. For macOS-arm64, + // it returns an encoded stack offset, see below. int GetNextOfs() { int index; @@ -189,6 +192,8 @@ class ShuffleIterator if (m_currentByteStackIndex < m_argLocDesc->m_byteStackSize) { const unsigned byteIndex = m_argLocDesc->m_byteStackIndex + m_currentByteStackIndex; + +#if !defined(TARGET_OSX) || !defined(TARGET_ARM64) index = byteIndex / TARGET_POINTER_SIZE; m_currentByteStackIndex += TARGET_POINTER_SIZE; @@ -198,7 +203,68 @@ class ShuffleIterator COMPlusThrow(kNotSupportedException); } - return -(int)byteIndex; + // Only Apple Silicon ABI currently supports unaligned stack argument shuffling + _ASSERTE(byteIndex == unsigned(index * TARGET_POINTER_SIZE)); + return index; +#else + // Tha Apple Silicon ABI does not consume an entire stack slot for every argument + // Arguments smaller than TARGET_POINTER_SIZE are always aligned to their argument size + // But may not begin at the beginning of a stack slot + // + // The argument location description has been updated to describe the stack offest and + // size in bytes. We will use it as our source of truth. + // + // The ShuffleEntries will be implemented by the Arm64 StubLinkerCPU::EmitLoadStoreRegImm + // using the 12-bit scaled immediate stack offset. The load/stores can be implemented as 1/2/4/8 + // bytes each (natural binary sizes). + // + // Each offset is encode as a log2 size and a 12-bit unsigned scaled offset. + // We only emit offsets of these natural binary sizes + // + // We choose the offset based on the ABI stack alignment requirements + // - Small integers are shuffled based on their size + // - HFA are shuffled based on their element size + // - Others are shuffled in full 8 byte chunks. + int bytesRemaining = m_argLocDesc->m_byteStackSize - m_currentByteStackIndex; + int log2Size = 3; + + // If isHFA, shuffle based on field size + // otherwise shuffle based on stack size + switch(m_argLocDesc->m_hfaFieldSize ? m_argLocDesc->m_hfaFieldSize : m_argLocDesc->m_byteStackSize) + { + case 1: + log2Size = 0; + break; + case 2: + log2Size = 1; + break; + case 4: + log2Size = 2; + break; + case 3: // Unsupported Size + case 5: // Unsupported Size + case 6: // Unsupported Size + case 7: // Unsupported Size + _ASSERTE(false); + break; + default: // Should be a multiple of 8 (TARGET_POINTER_SIZE) + _ASSERTE(bytesRemaining >= TARGET_POINTER_SIZE); + break; + } + + m_currentByteStackIndex += (1 << log2Size); + + // Delegates cannot handle overly large argument stacks due to shuffle entry encoding limitations. + // Arm64 current implementation only supports 12 bit unsigned scaled offset + if ((byteIndex >> log2Size) > 0xfff) + { + COMPlusThrow(kNotSupportedException); + } + + _ASSERTE((byteIndex & ((1 << log2Size) - 1)) == 0); + + return (byteIndex >> log2Size) | (log2Size << 12); +#endif } // There are no more offsets to get, the caller should not have called us @@ -274,31 +340,8 @@ BOOL AddNextShuffleEntryToArray(ArgLocDesc sArgSrc, ArgLocDesc sArgDst, SArray(stackSizeDelta); + entry.stacksizedelta = static_cast(stackSizeDelta); pShuffleEntryArray->Append(entry); #else diff --git a/src/coreclr/vm/comdelegate.h b/src/coreclr/vm/comdelegate.h index b9ddb37d0b8b1..87997fd20cfbc 100644 --- a/src/coreclr/vm/comdelegate.h +++ b/src/coreclr/vm/comdelegate.h @@ -202,7 +202,7 @@ struct ShuffleEntry union { UINT16 dstofs; //if srcofs != SENTINEL - UINT16 stacksizedelta; //if dstofs == SENTINEL, difference in stack size between virtual and static sigs + UINT16 stacksizedelta; //if srcofs == SENTINEL, difference in stack size between virtual and static sigs }; }; diff --git a/src/tests/issues.targets b/src/tests/issues.targets index e35f4dc231dee..2c875c373a280 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -734,16 +734,6 @@ - - - https://github.com/dotnet/runtime/issues/47294 - - - https://github.com/dotnet/runtime/issues/47294 - - - https://github.com/dotnet/runtime/issues/47294 - https://github.com/dotnet/runtime/issues/48786