Skip to content

Commit

Permalink
Intrinsify Interlocked.And and Interlocked.Or on XARCH (#96258)
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo committed Jan 5, 2024
1 parent 51f6d8d commit a5ff7e6
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 11 deletions.
57 changes: 52 additions & 5 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2047,12 +2047,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

case GT_XCHG:
case GT_XADD:
genLockedInstructions(treeNode->AsOp());
break;

case GT_XORR:
case GT_XAND:
NYI("Interlocked.Or and Interlocked.And aren't implemented for x86 yet.");
genLockedInstructions(treeNode->AsOp());
break;

case GT_MEMORYBARRIER:
Expand Down Expand Up @@ -4413,7 +4410,7 @@ void CodeGen::genCodeForLockAdd(GenTreeOp* node)
//
void CodeGen::genLockedInstructions(GenTreeOp* node)
{
assert(node->OperIs(GT_XADD, GT_XCHG));
assert(node->OperIs(GT_XADD, GT_XCHG, GT_XORR, GT_XAND));

GenTree* addr = node->gtGetOp1();
GenTree* data = node->gtGetOp2();
Expand All @@ -4425,6 +4422,56 @@ void CodeGen::genLockedInstructions(GenTreeOp* node)

genConsumeOperands(node);

if (node->OperIs(GT_XORR, GT_XAND))
{
const instruction ins = node->OperIs(GT_XORR) ? INS_or : INS_and;

if (node->IsUnusedValue())
{
// If value is not used we can emit a short form:
//
// lock
// or/and dword ptr [addrReg], val
//
instGen(INS_lock);
GetEmitter()->emitIns_AR_R(ins, size, data->GetRegNum(), addr->GetRegNum(), 0);
}
else
{
// When value is used (it's the original value of the memory location)
// we fallback to cmpxchg-loop idiom.

// for cmpxchg we need to keep the original value in RAX
assert(node->GetRegNum() == REG_RAX);

// mov RAX, dword ptr [addrReg]
//.LOOP:
// mov tmp, RAX
// or/and tmp, val
// lock
// cmpxchg dword ptr [addrReg], tmp
// jne .LOOP
// ret

// Extend liveness of addr
gcInfo.gcMarkRegPtrVal(addr->GetRegNum(), addr->TypeGet());

const regNumber tmpReg = node->GetSingleTempReg();
GetEmitter()->emitIns_R_AR(INS_mov, size, REG_RAX, addr->GetRegNum(), 0);
BasicBlock* loop = genCreateTempLabel();
genDefineTempLabel(loop);
GetEmitter()->emitIns_Mov(INS_mov, size, tmpReg, REG_RAX, false);
GetEmitter()->emitIns_R_R(ins, size, tmpReg, data->GetRegNum());
instGen(INS_lock);
GetEmitter()->emitIns_AR_R(INS_cmpxchg, size, tmpReg, addr->GetRegNum(), 0);
inst_JMP(EJ_jne, loop);

gcInfo.gcMarkRegSetNpt(genRegMask(addr->GetRegNum()));
genProduceReg(node);
}
return;
}

// If the destination register is different from the data register then we need
// to first move the data to the target register. Make sure we don't overwrite
// the address, the register allocator should have taken care of this.
Expand Down
14 changes: 10 additions & 4 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3231,14 +3231,20 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
break;
}

#if defined(TARGET_ARM64) || defined(TARGET_RISCV64)
// Intrinsify Interlocked.Or and Interlocked.And only for arm64-v8.1 (and newer) and for RV64A
// TODO-CQ: Implement for XArch (https://github.com/dotnet/runtime/issues/32239).
#if defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_XARCH)
case NI_System_Threading_Interlocked_Or:
case NI_System_Threading_Interlocked_And:
{
ARM64_ONLY(if (compOpportunisticallyDependsOn(InstructionSet_Atomics)))
#if defined(TARGET_ARM64)
if (compOpportunisticallyDependsOn(InstructionSet_Atomics))
#endif
{
#if defined(TARGET_X86)
if (genActualType(callType) == TYP_LONG)
{
break;
}
#endif
assert(sig->numArgs == 2);
GenTree* op2 = impPopStack().val;
GenTree* op1 = impPopStack().val;
Expand Down
2 changes: 0 additions & 2 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,8 +640,6 @@ GenTree* Lowering::LowerNode(GenTree* node)
CheckImmedAndMakeContained(node, node->AsOp()->gtOp2);
break;
#elif defined(TARGET_XARCH)
case GT_XORR:
case GT_XAND:
case GT_XADD:
if (node->IsUnusedValue())
{
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,19 @@ int LinearScan::BuildNode(GenTree* tree)

case GT_XORR:
case GT_XAND:
if (!tree->IsUnusedValue())
{
// if tree's value is used, we'll emit a cmpxchg-loop idiom (requires RAX)
buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX);
BuildUse(tree->gtGetOp1(), availableIntRegs & ~RBM_RAX);
BuildUse(tree->gtGetOp2(), availableIntRegs & ~RBM_RAX);
BuildDef(tree, RBM_RAX);
buildInternalRegisterUses();
srcCount = 2;
assert(dstCount == 1);
break;
}
FALLTHROUGH;
case GT_XADD:
case GT_XCHG:
{
Expand Down

0 comments on commit a5ff7e6

Please sign in to comment.