Skip to content

Commit

Permalink
Merge branch 'main' into issue-100538
Browse files Browse the repository at this point in the history
  • Loading branch information
eiriktsarpalis committed May 12, 2024
2 parents b5f4d3f + 3de068c commit 83fc4c4
Show file tree
Hide file tree
Showing 17 changed files with 101 additions and 118 deletions.
12 changes: 10 additions & 2 deletions src/coreclr/classlibnative/bcltype/arraynative.inl
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,12 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t
_ASSERTE(CheckPointer(dest));
_ASSERTE(CheckPointer(src));

GCHeapMemoryBarrier();
const bool notInHeap = ((BYTE*)dest < g_lowest_address || (BYTE*)dest >= g_highest_address);

if (!notInHeap)
{
GCHeapMemoryBarrier();
}

// To be able to copy forwards, the destination buffer cannot start inside the source buffer
if ((size_t)dest - (size_t)src >= len)
Expand All @@ -319,7 +324,10 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t
InlinedBackwardGCSafeCopyHelper(dest, src, len);
}

InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len);
if (!notInHeap)
{
InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len);
}
}

#endif // !_ARRAYNATIVE_INL_
40 changes: 40 additions & 0 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,46 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition,
return newMask;
}

//------------------------------------------------------------------------
// When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
// registers, so as to get different coverage than limiting to callee or caller.
// At least for x86 and AMD64, and potentially other architecture that will support SIMD,
// we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
// Hence the "SmallFPSet" has 5 elements.

#if defined(TARGET_AMD64)
#ifdef UNIX_AMD64_ABI
// On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
#else // !UNIX_AMD64_ABI
// On Windows Amd64 use the RDI and RSI as callee saved registers.
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
#endif // !UNIX_AMD64_ABI
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
static const regMaskTP LsraLimitUpperSimdSet =
(RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 |
RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31);
#elif defined(TARGET_ARM)
// On ARM, we may need two registers to set up the target register for a virtual call, so we need
// to have at least the maximum number of arg registers, plus 2.
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
#elif defined(TARGET_ARM64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
#elif defined(TARGET_X86)
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
#elif defined(TARGET_LOONGARCH64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#elif defined(TARGET_RISCV64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#else
#error Unsupported or unset target architecture
#endif // target

//------------------------------------------------------------------------
// stressLimitRegs: Given a set of registers, expressed as a register mask, reduce
// them based on the current stress options.
Expand Down
41 changes: 0 additions & 41 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -768,47 +768,6 @@ class LinearScan : public LinearScanInterface
#endif
};

// When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
// registers, so as to get different coverage than limiting to callee or caller.
// At least for x86 and AMD64, and potentially other architecture that will support SIMD,
// we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
// Hence the "SmallFPSet" has 5 elements.

#if defined(TARGET_AMD64)
#ifdef UNIX_AMD64_ABI
// On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
static const regMaskTP LsraLimitSmallIntSet =
(RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
#else // !UNIX_AMD64_ABI
// On Windows Amd64 use the RDI and RSI as callee saved registers.
static const regMaskTP LsraLimitSmallIntSet =
(RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
#endif // !UNIX_AMD64_ABI
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
static const regMaskTP LsraLimitUpperSimdSet =
(RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 |
RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31);
#elif defined(TARGET_ARM)
// On ARM, we may need two registers to set up the target register for a virtual call, so we need
// to have at least the maximum number of arg registers, plus 2.
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
#elif defined(TARGET_ARM64)
static constexpr regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
static constexpr regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
#elif defined(TARGET_X86)
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
#elif defined(TARGET_LOONGARCH64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#elif defined(TARGET_RISCV64)
static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0);
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9);
#else
#error Unsupported or unset target architecture
#endif // target

LsraStressLimitRegs getStressLimitRegs()
{
return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
Expand Down
23 changes: 17 additions & 6 deletions src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,28 @@ FCIMPLEND

FCIMPL3(void, RhBulkMoveWithWriteBarrier, uint8_t* pDest, uint8_t* pSrc, size_t cbDest)
{
// It is possible that the bulk write is publishing object references accessible so far only
// by the current thread to shared memory.
// The memory model requires that writes performed by current thread are observable no later
// than the writes that will actually publish the references.
GCHeapMemoryBarrier();
if (cbDest == 0 || pDest == pSrc)
return;

const bool notInHeap = pDest < g_lowest_address || pDest >= g_highest_address;

if (!notInHeap)
{
// It is possible that the bulk write is publishing object references accessible so far only
// by the current thread to shared memory.
// The memory model requires that writes performed by current thread are observable no later
// than the writes that will actually publish the references.
GCHeapMemoryBarrier();
}

if (pDest <= pSrc || pSrc + cbDest <= pDest)
InlineForwardGCSafeCopy(pDest, pSrc, cbDest);
else
InlineBackwardGCSafeCopy(pDest, pSrc, cbDest);

InlinedBulkWriteBarrier(pDest, cbDest);
if (!notInHeap)
{
InlinedBulkWriteBarrier(pDest, cbDest);
}
}
FCIMPLEND
11 changes: 3 additions & 8 deletions src/coreclr/nativeaot/Runtime/GCMemoryHelpers.inl
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,9 @@ FORCEINLINE void InlineCheckedWriteBarrier(void * dst, void * ref)

FORCEINLINE void InlinedBulkWriteBarrier(void* pMemStart, size_t cbMemSize)
{
// Check whether the writes were even into the heap. If not there's no card update required.
// Also if the size is smaller than a pointer, no write barrier is required.
// This case can occur with universal shared generic code where the size
// is not known at compile time.
if (pMemStart < g_lowest_address || (pMemStart >= g_highest_address) || (cbMemSize < sizeof(uintptr_t)))
{
return;
}
// Caller is expected to check whether the writes were even into the heap
ASSERT(cbMemSize >= sizeof(uintptr_t));
ASSERT((pMemStart >= g_lowest_address) && (pMemStart < g_highest_address));

#ifdef WRITE_BARRIER_CHECK
// Perform shadow heap updates corresponding to the gc heap updates that immediately preceded this helper
Expand Down
37 changes: 1 addition & 36 deletions src/coreclr/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,39 +1484,4 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref)
}
}
}
}

//----------------------------------------------------------------------------
//
// Write Barrier Support for bulk copy ("Clone") operations
//
// StartPoint is the target bulk copy start point
// len is the length of the bulk copy (in bytes)
//
//
// Performance Note:
//
// This is implemented somewhat "conservatively", that is we
// assume that all the contents of the bulk copy are object
// references. If they are not, and the value lies in the
// ephemeral range, we will set false positives in the card table.
//
// We could use the pointer maps and do this more accurately if necessary

#if defined(_MSC_VER) && defined(TARGET_X86)
#pragma optimize("y", on) // Small critical routines, don't put in EBP frame
#endif //_MSC_VER && TARGET_X86

void
SetCardsAfterBulkCopy(Object **start, size_t len)
{
// If the size is smaller than a pointer, no write barrier is required.
if (len >= sizeof(uintptr_t))
{
InlinedSetCardsAfterBulkCopyHelper(start, len);
}
}

#if defined(_MSC_VER) && defined(TARGET_X86)
#pragma optimize("", on) // Go back to command line default optimizations
#endif //_MSC_VER && TARGET_X86
}
1 change: 0 additions & 1 deletion src/coreclr/vm/gchelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ extern void ThrowOutOfMemoryDimensionsExceeded();
//========================================================================

void ErectWriteBarrier(OBJECTREF* dst, OBJECTREF ref);
void SetCardsAfterBulkCopy(Object **start, size_t len);

void PublishFrozenObject(Object*& orObject);

Expand Down
8 changes: 2 additions & 6 deletions src/coreclr/vm/gchelpers.inl
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,9 @@

FORCEINLINE void InlinedSetCardsAfterBulkCopyHelper(Object **start, size_t len)
{
// Check whether the writes were even into the heap. If not there's no card update required.
// Also if the size is smaller than a pointer, no write barrier is required.
// Caller is expected to check whether the writes were even into the heap
_ASSERTE(len >= sizeof(uintptr_t));
if ((BYTE*)start < g_lowest_address || (BYTE*)start >= g_highest_address)
{
return;
}
_ASSERTE(((BYTE*)start >= g_lowest_address) && ((BYTE*)start < g_highest_address));

// Don't optimize the Generation 0 case if we are checking for write barrier violations
// since we need to update the shadow heap even in the generation 0 case.
Expand Down
3 changes: 3 additions & 0 deletions src/mono/mono/metadata/icall.c
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,9 @@ ves_icall_System_Runtime_RuntimeImports_Memmove (guint8 *destination, guint8 *so
void
ves_icall_System_Buffer_BulkMoveWithWriteBarrier (guint8 *destination, guint8 *source, size_t len, MonoType *type)
{
if (len == 0 || destination == source)
return;

if (MONO_TYPE_IS_REFERENCE (type))
mono_gc_wbarrier_arrayref_copy_internal (destination, source, (guint)len);
else
Expand Down
1 change: 1 addition & 0 deletions src/tests/JIT/Methodical/doublearray/dblarray1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ public static void Run(Action f)
[Fact]
[SkipOnCoreClr("This test is not compatible with GCStress.", RuntimeTestModes.AnyGCStress)]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/101284", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))]
public static int TestEntryPoint()
{
if (RuntimeInformation.ProcessArchitecture == Architecture.X86)
Expand Down
1 change: 1 addition & 0 deletions src/tests/JIT/Methodical/doublearray/dblarray2.cs
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ public static void Run(Action f)
[Fact]
[SkipOnCoreClr("This test is not compatible with GCStress.", RuntimeTestModes.AnyGCStress)]
[SkipOnMono("Needs triage")]
[ActiveIssue("https://github.com/dotnet/runtime/issues/101284", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))]
public static int TestEntryPoint()
{
if (RuntimeInformation.ProcessArchitecture == Architecture.X86)
Expand Down
1 change: 1 addition & 0 deletions src/tests/JIT/Methodical/doublearray/dblarray3.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ public static void Run(Action f)
[SkipOnCoreClr("This test is not compatible with GCStress.", RuntimeTestModes.AnyGCStress)]
[SkipOnMono("Needs triage")]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/101284", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))]
public static int TestEntryPoint()
{
Console.WriteLine(RuntimeInformation.ProcessArchitecture);
Expand Down
1 change: 1 addition & 0 deletions src/tests/JIT/Methodical/doublearray/dblarray4.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public class DblArray4
[Fact]
[SkipOnCoreClr("This test is not compatible with GCStress.", RuntimeTestModes.AnyGCStress)]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtime/issues/101284", typeof(TestLibrary.Utilities), nameof(TestLibrary.Utilities.IsNativeAot))]
public static int TestEntryPoint()
{
if (RuntimeInformation.ProcessArchitecture == Architecture.X86)
Expand Down
1 change: 0 additions & 1 deletion src/tests/JIT/Methodical/refany/array2.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ private static void TestRef(TypedReference _ref)

[Fact]
[OuterLoop]
[ActiveIssue("https://github.com/dotnet/runtimelab/issues/155", typeof(PlatformDetection), nameof(PlatformDetection.IsNonZeroLowerBoundArrayNotSupported))]
public static int TestEntryPoint()
{
ulong[,] aul2 = new ulong[,] { { 1, 2, 3 }, { 4, 5, 6 } };
Expand Down
19 changes: 19 additions & 0 deletions src/tests/JIT/Methodical/refany/array2.il
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@
.publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A )
.ver 4:0:0:0
}
.assembly extern System.Runtime { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) }
.assembly ASSEMBLY_NAME
{
}
.assembly extern xunit.core {}
.assembly extern Microsoft.DotNet.XUnitExtensions { .publickeytoken = (31 BF 38 56 AD 36 4E 35 ) }
.assembly extern TestLibrary {}
// MVID: {1781A471-82F3-4159-8D0F-B3B5A9FF1BB6}
.namespace JitTest_array2_refany_il
{
Expand Down Expand Up @@ -125,6 +128,22 @@
.custom instance void [xunit.core]Xunit.FactAttribute::.ctor() = (
01 00 00 00
)
// [ActiveIssue("https://github.com/dotnet/runtimelab/issues/155", typeof(PlatformDetection), nameof(PlatformDetection.IsNonZeroLowerBoundArrayNotSupported))]
.custom instance void [Microsoft.DotNet.XUnitExtensions]Xunit.ActiveIssueAttribute::.ctor(string,
class [System.Runtime]System.Type,
string[]) = ( 01 00 2F 68 74 74 70 73 3A 2F 2F 67 69 74 68 75
62 2E 63 6F 6D 2F 64 6F 74 6E 65 74 2F 72 75 6E
74 69 6D 65 6C 61 62 2F 69 73 73 75 65 73 2F 31
35 35 61 54 65 73 74 4C 69 62 72 61 72 79 2E 50
6C 61 74 66 6F 72 6D 44 65 74 65 63 74 69 6F 6E
2C 20 54 65 73 74 4C 69 62 72 61 72 79 2C 20 56
65 72 73 69 6F 6E 3D 30 2E 30 2E 30 2E 30 2C 20
43 75 6C 74 75 72 65 3D 6E 65 75 74 72 61 6C 2C
20 50 75 62 6C 69 63 4B 65 79 54 6F 6B 65 6E 3D
6E 75 6C 6C 01 00 00 00 24 49 73 4E 6F 6E 5A 65
72 6F 4C 6F 77 65 72 42 6F 75 6E 64 41 72 72 61
79 4E 6F 74 53 75 70 70 6F 72 74 65 64 00 00 )

.entrypoint
// Code size 150 (0x96)
.maxstack 4
Expand Down
4 changes: 2 additions & 2 deletions src/tests/ilverify/TestDataLoader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,10 @@ public TestResolver(Dictionary<string, string> simpleNameToPathMap)
_simpleNameToPathMap = simpleNameToPathMap;
}

PEReader IResolver.ResolveAssembly(AssemblyName assemblyName)
PEReader IResolver.ResolveAssembly(AssemblyNameInfo assemblyName)
=> Resolve(assemblyName.Name);

PEReader IResolver.ResolveModule(AssemblyName referencingModule, string fileName)
PEReader IResolver.ResolveModule(AssemblyNameInfo referencingModule, string fileName)
=> Resolve(Path.GetFileNameWithoutExtension(fileName));

public PEReader Resolve(string simpleName)
Expand Down
15 changes: 0 additions & 15 deletions src/tests/issues.targets
Original file line number Diff line number Diff line change
Expand Up @@ -1152,21 +1152,6 @@

<!-- NativeAOT x86 specific -->
<ItemGroup Condition="'$(XunitTestBinBase)' != '' and '$(TestBuildMode)' == 'nativeaot' and '$(RuntimeFlavor)' == 'coreclr' and '$(TargetArchitecture)' == 'x86'">
<ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/doublearray/dblarray2_cs_d/**">
<Issue>https://github.com/dotnet/runtime/issues/101284</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/doublearray/dblarray2_cs_do/**">
<Issue>https://github.com/dotnet/runtime/issues/101284</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/doublearray/dblarray2_cs_r/**">
<Issue>https://github.com/dotnet/runtime/issues/101284</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/doublearray/dblarray2_cs_ro/**">
<Issue>https://github.com/dotnet/runtime/issues/101284</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/Methodical/doublearray/dblarray3_cs_do/**">
<Issue>https://github.com/dotnet/runtime/issues/101284</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/opt/perf/doublealign/Arrays/**">
<Issue>https://github.com/dotnet/runtime/issues/101284</Issue>
</ExcludeList>
Expand Down

0 comments on commit 83fc4c4

Please sign in to comment.