Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,8 @@ class CodeGen final : public CodeGenInterface
void genAmd64EmitterUnitTestsApx();
void genAmd64EmitterUnitTestsAvx10v2();
void genAmd64EmitterUnitTestsCCMP();
void genAmd64EmitterUnitTestsCFCMOV();
void genAmd64EmitterUnitTestsCTEST();
#endif

#endif // defined(DEBUG)
Expand Down Expand Up @@ -1641,9 +1643,11 @@ class CodeGen final : public CodeGenInterface
static insOpts ShiftOpToInsOpts(genTreeOps op);
#elif defined(TARGET_XARCH)
static instruction JumpKindToCmov(emitJumpKind condition);
#ifdef TARGET_AMD64
static instruction JumpKindToCcmp(emitJumpKind condition);
static insOpts OptsFromCFlags(insCflags flags);
#endif
#endif // TARGET_AMD64
#endif // TARGET_XARCH
void inst_JCC(GenCondition condition, BasicBlock* target);
void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg);

Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2751,6 +2751,14 @@ void CodeGen::genEmitterUnitTests()
{
genAmd64EmitterUnitTestsCCMP();
}
if (unitTestSectionAll || (strstr(unitTestSection, "cfcmov") != nullptr))
{
genAmd64EmitterUnitTestsCFCMOV();
}
if (unitTestSectionAll || (strstr(unitTestSection, "ctest") != nullptr))
{
genAmd64EmitterUnitTestsCTEST();
}

#elif defined(TARGET_ARM64)
if (unitTestSectionAll || (strstr(unitTestSection, "general") != nullptr))
Expand Down
188 changes: 183 additions & 5 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1436,6 +1436,7 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
return s_table[condition];
}

#ifdef TARGET_AMD64
//------------------------------------------------------------------------
// JumpKindToCcmp:
// Convert an emitJumpKind to the corresponding ccmp instruction.
Expand Down Expand Up @@ -1475,6 +1476,7 @@ instruction CodeGen::JumpKindToCcmp(emitJumpKind condition)
assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
return s_table[condition];
}
#endif // TARGET_AMD64

//------------------------------------------------------------------------
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node.
Expand Down Expand Up @@ -8631,8 +8633,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
regSet.verifyRegistersUsed(killMask);
}

#ifdef TARGET_AMD64
//-----------------------------------------------------------------------------------------
// OptsFromCFlags - Convert condition flags into approxpriate insOpts.
// OptsFromCFlags - Convert condition flags into appropriate insOpts.
//
// Arguments:
// flags - The condition flags to be converted.
Expand All @@ -8642,7 +8645,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
//
// Notes:
// This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate
// instruction options used for setting the default flag values in extneded EVEX
// instruction options used for setting the default flag values in extended EVEX
// encoding conditional instructions.
//
insOpts CodeGen::OptsFromCFlags(insCflags flags)
Expand All @@ -8659,8 +8662,6 @@ insOpts CodeGen::OptsFromCFlags(insCflags flags)
return (insOpts)opts;
}

#ifdef TARGET_AMD64

//-----------------------------------------------------------------------------------------
// genCodeForCCMP - Generate code for a conditional compare (CCMP) node.
//
Expand Down Expand Up @@ -8699,7 +8700,17 @@ void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp)
if (op2->isContainedIntOrIImmed())
{
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
if (intConst->IconValue() == 0)
{
// ctest reg, reg is 1-byte shorter encoding than ccmp reg, 0.
assert((FIRST_CTEST_INSTRUCTION - FIRST_APX_INSTRUCTION) == 32);
instruction ctestIns = (instruction)(ccmpIns + FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION);
Copy link

Copilot AI Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deriving ctestIns via instruction-enum arithmetic is brittle (it silently depends on the relative ordering and contiguity of CCMP and CTEST instruction enums). Prefer an explicit mapping helper (e.g., derive insCC once and map to the corresponding CTEST instruction via a small table/switch) so future instruction table edits don’t break this transformation.

Suggested change
instruction ctestIns = (instruction)(ccmpIns + FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION);
// Derive the matching ctest instruction explicitly from the same condition
// instead of relying on the relative enum layout of CCMP/CTEST instructions.
instruction ctestIns = JumpKindToCtest(condDesc.jumpKind1);

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Member Author

@Ruihan-Yin Ruihan-Yin Apr 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added an assert to make sure the offset between FIRST_CCMP_INSTRUCTION and FIRST_CTEST_INSTRUCTION is not changed. This is a simply, 1:1 mapping, probably not necessarily to use a switch table.

But I am open to change the design if other reviewers are inclined to enhance the robustness with the switch table.

Comment on lines +8706 to +8707
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This peephole relies on a hard-coded ‘32’ offset and an assert that references FIRST_APX_INSTRUCTION (even though the conversion uses FIRST_CCMP_INSTRUCTION). To make the mapping more robust/clear, prefer a compile-time check (static_assert) on (FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION) and compute the offset once from those two symbols (removing the magic number and the unrelated FIRST_APX reference).

Suggested change
assert((FIRST_CTEST_INSTRUCTION - FIRST_APX_INSTRUCTION) == 32);
instruction ctestIns = (instruction)(ccmpIns + FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION);
constexpr int ccmpToCtestOffset = FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION;
static_assert(ccmpToCtestOffset == 32);
instruction ctestIns = static_cast<instruction>(ccmpIns + ccmpToCtestOffset);

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix proposed here: #127536 (comment)

emit->emitIns_R_R(ctestIns, cmpSize, srcReg1, srcReg1, opts);
}
else
{
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
}
}
else
{
Expand Down Expand Up @@ -9301,6 +9312,100 @@ void CodeGen::genAmd64EmitterUnitTestsAvx10v2()
theEmitter->emitIns_R_R(INS_vmovw_simd, EA_16BYTE, REG_XMM0, REG_XMM1);
}

/*****************************************************************************
* Unit tests for the CFCMOV instructions.
*/

void CodeGen::genAmd64EmitterUnitTestsCFCMOV()
{
emitter* theEmitter = GetEmitter();
genDefineTempLabel(genCreateTempLabel());

GenTreePhysReg physReg(REG_EDX);
physReg.SetRegNum(REG_EDX);
GenTreeIndir load = indirForm(TYP_INT, &physReg);

// Test all CC codes
for (uint32_t ins = FIRST_CFCMOV_INSTRUCTION; ins <= LAST_CFCMOV_INSTRUCTION; ins++)
{
theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_NONE);
theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_NONE);

theEmitter->emitIns_R_A((instruction)ins, EA_8BYTE, REG_EAX, &load, INS_OPTS_NONE);
theEmitter->emitIns_R_A((instruction)ins, EA_4BYTE, REG_EAX, &load, INS_OPTS_NONE);

theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_EAX, REG_ECX, 4);
theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_EAX, REG_ECX, 4);
Comment thread
Ruihan-Yin marked this conversation as resolved.

theEmitter->emitIns_R_ARX((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 1, 0);
theEmitter->emitIns_R_ARX((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 1, 0);
theEmitter->emitIns_R_ARX((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, 4);
theEmitter->emitIns_R_ARX((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, 4);

theEmitter->emitIns_AR_R((instruction)ins, EA_8BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_nf);
theEmitter->emitIns_AR_R((instruction)ins, EA_4BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_nf);

theEmitter->emitIns_ARX_R((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, 4, INS_OPTS_EVEX_nf);
theEmitter->emitIns_ARX_R((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, 4, INS_OPTS_EVEX_nf);

theEmitter->emitIns_ARX_R((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf);
theEmitter->emitIns_ARX_R((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf);

theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));
theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));
theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));
theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));
theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));
theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));

theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R10, REG_R16, 0, 0,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));
theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R10, REG_R16, 0, 0,
(insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf));
}

// Test all CC codes
for (uint32_t ins = INS_cmovo; ins <= INS_cmovg; ins++)
{
theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX);
theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX);
theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_RCX);
theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_RCX);
theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_R16, REG_RCX);
theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_R16, REG_RCX);
theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX, 2);
theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, 2);
theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_R10, REG_RCX, 2);
theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_R10, REG_RCX, 2);
theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_RCX, 2);
theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_RCX, 2);
theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_RAX, 0, 0);
theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_RAX, 0, 0);
theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_R10, 0, 0);
theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_R10, 0, 0);
theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_R16, 0, 0);
theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_R16, 0, 0);
theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2,
(insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2,
(insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R10, 0, 0, (insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R17, REG_R10, 0, 0, (insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd));
theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd));
}
}

/*****************************************************************************
* Unit tests for the CCMP instructions.
*/
Expand Down Expand Up @@ -9379,6 +9484,79 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP()
theEmitter->emitIns_R_C(INS_ccmpe, EA_4BYTE, REG_RAX, hnd, 4, INS_OPTS_EVEX_dfv_cf);
}

/*****************************************************************************
* Unit tests for the CTEST instructions.
*/
void CodeGen::genAmd64EmitterUnitTestsCTEST()
{
assert(FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION == 32);
emitter* theEmitter = GetEmitter();
genDefineTempLabel(genCreateTempLabel());
GenTreePhysReg physReg(REG_EDX);
physReg.SetRegNum(REG_EDX);
GenTreeIndir load = indirForm(TYP_INT, &physReg);

// ============
// Test RR form
// ============

// Test all sizes
theEmitter->emitIns_R_R(INS_test, EA_4BYTE, REG_EAX, REG_ECX);
theEmitter->emitIns_R_R(INS_cteste, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_R_R(INS_cteste, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_R_R(INS_cteste, EA_2BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_R_R(INS_cteste, EA_1BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf);

// Test all CC codes
for (uint32_t ins = FIRST_CTEST_INSTRUCTION; ins <= LAST_CTEST_INSTRUCTION; ins++)
{
theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf);
}

// Test all dfv
for (int i = 0; i < 16; i++)
{
theEmitter->emitIns_R_R(INS_cteste, EA_4BYTE, REG_RAX, REG_RCX, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset));
}

// ============
// Test RI form (test small and large sizes and constants)
// ============

theEmitter->emitIns_R_I(INS_test, EA_8BYTE, REG_RAX, 123);
theEmitter->emitIns_R_I(INS_cteste, EA_8BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_R_I(INS_cteste, EA_8BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf);

theEmitter->emitIns_R_I(INS_test, EA_4BYTE, REG_RAX, 123);
theEmitter->emitIns_R_I(INS_cteste, EA_4BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_R_I(INS_cteste, EA_4BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf);

theEmitter->emitIns_R_I(INS_test, EA_2BYTE, REG_RAX, 123);
theEmitter->emitIns_R_I(INS_cteste, EA_2BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_R_I(INS_cteste, EA_2BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf);

theEmitter->emitIns_R_I(INS_test, EA_1BYTE, REG_RAX, 123);
theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf);

// ============
// Test MR form (test small and large sizes)
// ============

theEmitter->emitIns_AR_R(INS_cteste, EA_1BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_AR_R(INS_cteste, EA_2BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_AR_R(INS_cteste, EA_4BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_AR_R(INS_cteste, EA_8BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf);

// ============
// Test MI form
// ============

theEmitter->emitIns_I_AR(INS_cteste, EA_1BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_I_AR(INS_cteste, EA_2BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_I_AR(INS_cteste, EA_4BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf);
theEmitter->emitIns_I_AR(INS_cteste, EA_8BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf);
}
#endif // defined(DEBUG) && defined(TARGET_AMD64)

#ifdef PROFILING_SUPPORTED
Expand Down
Loading
Loading