Skip to content

Commit

Permalink
Implement Interlocked for small types. (#92974)
Browse files Browse the repository at this point in the history
* Implement Interlocked for small types.

Implements Interlocked.CompareExchange/Exchange for
byte/sbyte/short/ushort.

Uses an intrinsic on CoreCLR x64 and ARM64 and fallbacks
for other platforms.

Fixes #64658.

* Fix xarch assert

* Update PalRedhawkInline.h

* Update PalRedhawkInline.h

* Update threads.c

* Simplify NAOT handling

* Fix build

* Update atomic.h

* Update atomic.h

* Update atomic.h

* Fix windows intrinsics

* Update comutilnative.cpp

* Fix Windows builds

* Update icall-def.h

* Fix icall-def.h

* Improve tests

* Update InterlockedTests.cs

* Remove small type normalization

* Try to fix the intrinsics

* Fix ARM64 build

* Try to fix XArch 66 prefix and ARM64

* Fix typo

* Fix assert

* Code cleanup

* Update importercalls.cpp

* Update importercalls.cpp

* Extend small types properly

* Add RISC-V asserts

* c11 atomics interlocked ops for small types

* Simplify Windows atomics

* Restore NativeAOT fallbacks for other platforms

* Add more tests

* Format

* Update Interlocked.cs

* Fix tests and comment

* Add emitOutputCV handling

* Fix extension

* Use a slightly better fix

* More complete fix

* Fix the fix

* Fix more places

* CR feedback

* Fix corelib

* Fix tests

* Add using

* Match NativeAOT #ifs

* Update Interlocked.cs

* Fix test projects

* Fix tests, try linking with libatomic

* Improve build test

* Fix tests

* Fix tests again

* Remove libatomic

Co-authored-by: Jan Kotas <jkotas@microsoft.com>

* Cleanup, fix helper extension

* Fix test corelib

---------

Co-authored-by: Alexander Köplinger <alex.koeplinger@outlook.com>
Co-authored-by: Aleksey Kliger <alklig@microsoft.com>
Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
4 people committed Jan 25, 2024
1 parent dd158ba commit de9d791
Show file tree
Hide file tree
Showing 34 changed files with 1,102 additions and 192 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,48 @@ public static partial class Interlocked
#endregion

#region Exchange
/// <summary>Sets a 8-bit unsigned integer to a specified value and returns the original value, as an atomic operation.</summary>
/// <param name="location1">The variable to set to the specified value.</param>
/// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
/// <returns>The original value of <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of location1 is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte Exchange(ref byte location1, byte value)
{
#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
return Exchange(ref location1, value); // Must expand intrinsic
#else
if (Unsafe.IsNullRef(ref location1))
ThrowHelper.ThrowNullReferenceException();
return Exchange8(ref location1, value);
#endif
}

[MethodImpl(MethodImplOptions.InternalCall)]
private static extern byte Exchange8(ref byte location1, byte value);

/// <summary>Sets a 16-bit signed integer to a specified value and returns the original value, as an atomic operation.</summary>
/// <param name="location1">The variable to set to the specified value.</param>
/// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
/// <returns>The original value of <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of location1 is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short Exchange(ref short location1, short value)
{
#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
return Exchange(ref location1, value); // Must expand intrinsic
#else
if (Unsafe.IsNullRef(ref location1))
ThrowHelper.ThrowNullReferenceException();
return Exchange16(ref location1, value);
#endif
}

[MethodImpl(MethodImplOptions.InternalCall)]
private static extern short Exchange16(ref short location1, short value);

/// <summary>Sets a 32-bit signed integer to a specified value and returns the original value, as an atomic operation.</summary>
/// <param name="location1">The variable to set to the specified value.</param>
/// <param name="value">The value to which the <paramref name="location1"/> parameter is set.</param>
Expand Down Expand Up @@ -120,6 +162,50 @@ public static long Exchange(ref long location1, long value)
#endregion

#region CompareExchange
/// <summary>Compares two 8-bit unsigned integers for equality and, if they are equal, replaces the first value.</summary>
/// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
/// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
/// <param name="comparand">The value that is compared to the value at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static byte CompareExchange(ref byte location1, byte value, byte comparand)
{
#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
#else
if (Unsafe.IsNullRef(ref location1))
ThrowHelper.ThrowNullReferenceException();
return CompareExchange8(ref location1, value, comparand);
#endif
}

[MethodImpl(MethodImplOptions.InternalCall)]
private static extern byte CompareExchange8(ref byte location1, byte value, byte comparand);

/// <summary>Compares two 16-bit signed integers for equality and, if they are equal, replaces the first value.</summary>
/// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
/// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
/// <param name="comparand">The value that is compared to the value at <paramref name="location1"/>.</param>
/// <returns>The original value in <paramref name="location1"/>.</returns>
/// <exception cref="NullReferenceException">The address of <paramref name="location1"/> is a null pointer.</exception>
[Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static short CompareExchange(ref short location1, short value, short comparand)
{
#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64
return CompareExchange(ref location1, value, comparand); // Must expand intrinsic
#else
if (Unsafe.IsNullRef(ref location1))
ThrowHelper.ThrowNullReferenceException();
return CompareExchange16(ref location1, value, comparand);
#endif
}

[MethodImpl(MethodImplOptions.InternalCall)]
private static extern short CompareExchange16(ref short location1, short value, short comparand);

/// <summary>Compares two 32-bit signed integers for equality and, if they are equal, replaces the first value.</summary>
/// <param name="location1">The destination, whose value is compared with <paramref name="comparand"/> and possibly replaced.</param>
/// <param name="value">The value that replaces the destination value if the comparison results in equality.</param>
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/inc/winwrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,4 +254,13 @@ WszCreateProcess(
LPPROCESS_INFORMATION lpProcessInformation
);

#ifdef HOST_WINDOWS

//
// Workaround for https://github.com/microsoft/WindowsAppSDK/issues/4074
// Windows SDK is missing InterlockedCompareExchange8 definition.
//
#define InterlockedCompareExchange8 _InterlockedCompareExchange8

#endif // HOST_WINDOWS
#endif // __WIN_WRAP_H__
81 changes: 69 additions & 12 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3796,6 +3796,8 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
genConsumeAddress(addr);
genConsumeRegs(data);

assert(treeNode->OperIs(GT_XCHG) || !varTypeIsSmall(treeNode->TypeGet()));

emitAttr dataSize = emitActualTypeSize(data);

if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
Expand All @@ -3818,8 +3820,19 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
break;
}
case GT_XCHG:
GetEmitter()->emitIns_R_R_R(INS_swpal, dataSize, dataReg, targetReg, addrReg);
{
instruction ins = INS_swpal;
if (varTypeIsByte(treeNode->TypeGet()))
{
ins = INS_swpalb;
}
else if (varTypeIsShort(treeNode->TypeGet()))
{
ins = INS_swpalh;
}
GetEmitter()->emitIns_R_R_R(ins, dataSize, dataReg, targetReg, addrReg);
break;
}
case GT_XADD:
GetEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg,
addrReg);
Expand Down Expand Up @@ -3878,8 +3891,21 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
BasicBlock* labelRetry = genCreateTempLabel();
genDefineTempLabel(labelRetry);

instruction insLd = INS_ldaxr;
instruction insSt = INS_stlxr;
if (varTypeIsByte(treeNode->TypeGet()))
{
insLd = INS_ldaxrb;
insSt = INS_stlxrb;
}
else if (varTypeIsShort(treeNode->TypeGet()))
{
insLd = INS_ldaxrh;
insSt = INS_stlxrh;
}

// The following instruction includes a acquire half barrier
GetEmitter()->emitIns_R_R(INS_ldaxr, dataSize, loadReg, addrReg);
GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg);

switch (treeNode->OperGet())
{
Expand All @@ -3905,7 +3931,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
}

// The following instruction includes a release half barrier
GetEmitter()->emitIns_R_R_R(INS_stlxr, dataSize, exResultReg, storeDataReg, addrReg);
GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, storeDataReg, addrReg);

GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);

Expand All @@ -3914,8 +3940,14 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
}

if (treeNode->GetRegNum() != REG_NA)
if (targetReg != REG_NA)
{
if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet()))
{
instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb;
GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false);
}

genProduceReg(treeNode);
}
}
Expand Down Expand Up @@ -3943,18 +3975,27 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
genConsumeRegs(data);
genConsumeRegs(comparand);

emitAttr dataSize = emitActualTypeSize(data);

if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics))
{
emitAttr dataSize = emitActualTypeSize(data);

// casal use the comparand as the target reg
GetEmitter()->emitIns_Mov(INS_mov, dataSize, targetReg, comparandReg, /* canSkip */ true);

// Catch case we destroyed data or address before use
noway_assert((addrReg != targetReg) || (targetReg == comparandReg));
noway_assert((dataReg != targetReg) || (targetReg == comparandReg));

GetEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg);
instruction ins = INS_casal;
if (varTypeIsByte(treeNode->TypeGet()))
{
ins = INS_casalb;
}
else if (varTypeIsShort(treeNode->TypeGet()))
{
ins = INS_casalh;
}
GetEmitter()->emitIns_R_R_R(ins, dataSize, targetReg, dataReg, addrReg);
}
else
{
Expand Down Expand Up @@ -3988,9 +4029,6 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)

gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet());

// TODO-ARM64-CQ Use ARMv8.1 atomics if available
// https://github.com/dotnet/runtime/issues/8225

// Emit code like this:
// retry:
// ldxr targetReg, [addrReg]
Expand All @@ -4005,8 +4043,21 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
BasicBlock* labelCompareFail = genCreateTempLabel();
genDefineTempLabel(labelRetry);

instruction insLd = INS_ldaxr;
instruction insSt = INS_stlxr;
if (varTypeIsByte(treeNode->TypeGet()))
{
insLd = INS_ldaxrb;
insSt = INS_stlxrb;
}
else if (varTypeIsShort(treeNode->TypeGet()))
{
insLd = INS_ldaxrh;
insSt = INS_stlxrh;
}

// The following instruction includes a acquire half barrier
GetEmitter()->emitIns_R_R(INS_ldaxr, emitTypeSize(treeNode), targetReg, addrReg);
GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg);

if (comparand->isContainedIntOrIImmed())
{
Expand All @@ -4028,7 +4079,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
}

// The following instruction includes a release half barrier
GetEmitter()->emitIns_R_R_R(INS_stlxr, emitTypeSize(treeNode), exResultReg, dataReg, addrReg);
GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg);

GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);

Expand All @@ -4039,6 +4090,12 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
}

if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet()))
{
instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb;
GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false);
}

genProduceReg(treeNode);
}

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2891,6 +2891,8 @@ void CodeGen::genJumpTable(GenTree* treeNode)
//
void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
assert(!varTypeIsSmall(treeNode->TypeGet()));

GenTree* data = treeNode->AsOp()->gtOp2;
GenTree* addr = treeNode->AsOp()->gtOp1;
regNumber dataReg = data->GetRegNum();
Expand Down Expand Up @@ -2944,6 +2946,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
{
assert(treeNode->OperIs(GT_CMPXCHG));
assert(!varTypeIsSmall(treeNode->TypeGet()));

GenTree* locOp = treeNode->Addr();
GenTree* valOp = treeNode->Data();
Expand Down
26 changes: 22 additions & 4 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4424,14 +4424,15 @@ void CodeGen::genCodeForLockAdd(GenTreeOp* node)
void CodeGen::genLockedInstructions(GenTreeOp* node)
{
assert(node->OperIs(GT_XADD, GT_XCHG, GT_XORR, GT_XAND));
assert(node->OperIs(GT_XCHG) || !varTypeIsSmall(node->TypeGet()));

GenTree* addr = node->gtGetOp1();
GenTree* data = node->gtGetOp2();
emitAttr size = emitTypeSize(node->TypeGet());

assert(addr->isUsedFromReg());
assert(data->isUsedFromReg());
assert((size == EA_4BYTE) || (size == EA_PTRSIZE) || (size == EA_GCREF));
assert((size <= EA_PTRSIZE) || (size == EA_GCREF));

genConsumeOperands(node);

Expand Down Expand Up @@ -4499,7 +4500,15 @@ void CodeGen::genLockedInstructions(GenTreeOp* node)
instGen(INS_lock);
}

GetEmitter()->emitIns_AR_R(ins, size, node->GetRegNum(), addr->GetRegNum(), 0);
regNumber targetReg = node->GetRegNum();
GetEmitter()->emitIns_AR_R(ins, size, targetReg, addr->GetRegNum(), 0);

if (varTypeIsSmall(node->TypeGet()))
{
instruction mov = varTypeIsSigned(node->TypeGet()) ? INS_movsx : INS_movzx;
GetEmitter()->emitIns_Mov(mov, size, targetReg, targetReg, /* canSkip */ false);
}

genProduceReg(node);
}

Expand All @@ -4515,6 +4524,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* tree)

var_types targetType = tree->TypeGet();
regNumber targetReg = tree->GetRegNum();
emitAttr size = emitTypeSize(tree->TypeGet());

GenTree* location = tree->Addr(); // arg1
GenTree* value = tree->Data(); // arg2
Expand All @@ -4535,10 +4545,18 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* tree)
// location is Rm
instGen(INS_lock);

GetEmitter()->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->GetRegNum(), location->GetRegNum(), 0);
GetEmitter()->emitIns_AR_R(INS_cmpxchg, size, value->GetRegNum(), location->GetRegNum(), 0);

// Result is in RAX
inst_Mov(targetType, targetReg, REG_RAX, /* canSkip */ true);
if (varTypeIsSmall(tree->TypeGet()))
{
instruction mov = varTypeIsSigned(tree->TypeGet()) ? INS_movsx : INS_movzx;
GetEmitter()->emitIns_Mov(mov, size, targetReg, REG_RAX, /* canSkip */ false);
}
else
{
inst_Mov(targetType, targetReg, REG_RAX, /* canSkip */ true);
}

genProduceReg(tree);
}
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/emitfmtsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ IF_DEF(MRW_SHF, IS_GM_RW, DSP_CNS) // r/w
IF_DEF(MRD_RRD, IS_GM_RD|IS_R1_RD, DSP) // read [mem], read reg1
IF_DEF(MWR_RRD, IS_GM_WR|IS_R1_RD, DSP) // write [mem], read reg1
IF_DEF(MRW_RRD, IS_GM_RW|IS_R1_RD, DSP) // r/w [mem], read reg1
IF_DEF(MRW_RRW, IS_GM_RW|IS_R1_RW, DSP) // r/w [mem], r/w reg1 - for XCHG [mem], reg1

IF_DEF(MRD_RRD_CNS, IS_GM_RD|IS_R1_RD, DSP_CNS) // read [mem], read reg1, const
IF_DEF(MWR_RRD_CNS, IS_GM_WR|IS_R1_RD, DSP_CNS) // write [mem], read reg1, const
Expand Down Expand Up @@ -213,6 +214,7 @@ IF_DEF(SRW_SHF, IS_SF_RW, CNS) // r/w
IF_DEF(SRD_RRD, IS_SF_RD|IS_R1_RD, NONE) // read [stk], read reg1
IF_DEF(SWR_RRD, IS_SF_WR|IS_R1_RD, NONE) // write [stk], read reg1
IF_DEF(SRW_RRD, IS_SF_RW|IS_R1_RD, NONE) // r/w [stk], read reg1
IF_DEF(SRW_RRW, IS_SF_RW|IS_R1_RW, NONE) // r/w [stk], read reg1 - for XCHG [stk], reg1

IF_DEF(SRD_RRD_CNS, IS_SF_RD|IS_R1_RD, CNS) // read [stk], read reg1, const
IF_DEF(SWR_RRD_CNS, IS_SF_WR|IS_R1_RD, CNS) // write [stk], read reg1, const
Expand Down Expand Up @@ -257,6 +259,7 @@ IF_DEF(ARW_SHF, IS_AM_RW, AMD_CNS) // r/w
IF_DEF(ARD_RRD, IS_AM_RD|IS_R1_RD, AMD) // read [adr], read reg1
IF_DEF(AWR_RRD, IS_AM_WR|IS_R1_RD, AMD) // write [adr], read reg1
IF_DEF(ARW_RRD, IS_AM_RW|IS_R1_RD, AMD) // r/w [adr], read reg1
IF_DEF(ARW_RRW, IS_AM_RW|IS_R1_RW, AMD) // r/w [adr], r/w reg1 - for XCHG [adr], reg1

IF_DEF(ARD_RRD_CNS, IS_AM_RD|IS_R1_RD, AMD_CNS) // read [adr], read reg1, const
IF_DEF(AWR_RRD_CNS, IS_AM_WR|IS_R1_RD, AMD_CNS) // write [adr], read reg1, const
Expand Down
Loading

0 comments on commit de9d791

Please sign in to comment.