From d0d4999e620c12fcead7e4e67c15340be2a8535b Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Tue, 28 Apr 2026 15:50:05 -0700 Subject: [PATCH 1/6] introduce the remaining NCI instructions -CTEST, CFCMOV --- src/coreclr/jit/codegen.h | 6 +- src/coreclr/jit/codegenlinear.cpp | 8 ++ src/coreclr/jit/codegenxarch.cpp | 174 +++++++++++++++++++++++++++++- src/coreclr/jit/emitxarch.cpp | 163 +++++++++++++++++++++------- src/coreclr/jit/emitxarch.h | 17 ++- src/coreclr/jit/instrsxarch.h | 38 +++++++ 6 files changed, 362 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 8a9e8f06e4f428..639d6b36b11f57 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -630,6 +630,8 @@ class CodeGen final : public CodeGenInterface void genAmd64EmitterUnitTestsApx(); void genAmd64EmitterUnitTestsAvx10v2(); void genAmd64EmitterUnitTestsCCMP(); + void genAmd64EmitterUnitTestsCFCMOV(); + void genAmd64EmitterUnitTestsCTEST(); #endif #endif // defined(DEBUG) @@ -1641,9 +1643,11 @@ class CodeGen final : public CodeGenInterface static insOpts ShiftOpToInsOpts(genTreeOps op); #elif defined(TARGET_XARCH) static instruction JumpKindToCmov(emitJumpKind condition); +#ifdef TARGET_AMD64 static instruction JumpKindToCcmp(emitJumpKind condition); static insOpts OptsFromCFlags(insCflags flags); -#endif +#endif // TARGET_AMD64 +#endif // TARGET_XARCH void inst_JCC(GenCondition condition, BasicBlock* target); void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg); diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 5386aa65a5d74b..cd107cda815826 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2751,6 +2751,14 @@ void CodeGen::genEmitterUnitTests() { genAmd64EmitterUnitTestsCCMP(); } + if (unitTestSectionAll || (strstr(unitTestSection, "cfcmov") != nullptr)) + { + genAmd64EmitterUnitTestsCFCMOV(); + } + if (unitTestSectionAll || (strstr(unitTestSection, "ctest") != nullptr)) + { + genAmd64EmitterUnitTestsCTEST(); + } #elif defined(TARGET_ARM64) if (unitTestSectionAll || (strstr(unitTestSection, "general") != nullptr)) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 892d3f2def304f..a9359510014481 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1436,6 +1436,7 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition) return s_table[condition]; } +#ifdef TARGET_AMD64 //------------------------------------------------------------------------ // JumpKindToCcmp: // Convert an emitJumpKind to the corresponding ccmp instruction. @@ -1475,6 +1476,7 @@ instruction CodeGen::JumpKindToCcmp(emitJumpKind condition) assert((condition >= EJ_NONE) && (condition < EJ_COUNT)); return s_table[condition]; } +#endif // TARGET_AMD64 //------------------------------------------------------------------------ // genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node. @@ -8631,6 +8633,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regSet.verifyRegistersUsed(killMask); } +#ifdef TARGET_AMD64 //----------------------------------------------------------------------------------------- // OptsFromCFlags - Convert condition flags into approxpriate insOpts. // @@ -8659,8 +8662,6 @@ insOpts CodeGen::OptsFromCFlags(insCflags flags) return (insOpts)opts; } -#ifdef TARGET_AMD64 - //----------------------------------------------------------------------------------------- // genCodeForCCMP - Generate code for a conditional compare (CCMP) node. // @@ -8699,7 +8700,16 @@ void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp) if (op2->isContainedIntOrIImmed()) { GenTreeIntConCommon* intConst = op2->AsIntConCommon(); - emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts); + if (intConst->IconValue() == 0) + { + // ctest reg, reg is 1-byte shorter encoding than ccmp reg, 0. + instruction ctestIns = (instruction)(ccmpIns + FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION); + emit->emitIns_R_R(ctestIns, cmpSize, srcReg1, srcReg1, opts); + } + else + { + emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts); + } } else { @@ -9301,6 +9311,91 @@ void CodeGen::genAmd64EmitterUnitTestsAvx10v2() theEmitter->emitIns_R_R(INS_vmovw_simd, EA_16BYTE, REG_XMM0, REG_XMM1); } +/***************************************************************************** + * Unit tests for the CFCMOV instructions. + */ + +void CodeGen::genAmd64EmitterUnitTestsCFCMOV() +{ + emitter* theEmitter = GetEmitter(); + genDefineTempLabel(genCreateTempLabel()); + + GenTreePhysReg physReg(REG_EDX); + physReg.SetRegNum(REG_EDX); + GenTreeIndir load = indirForm(TYP_INT, &physReg); + + // Test all CC codes + for (uint32_t ins = FIRST_CFCMOV_INSTRUCTION; ins <= LAST_CFCMOV_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_NONE); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_NONE); + + theEmitter->emitIns_R_A((instruction)ins, EA_8BYTE, REG_EAX, &load, INS_OPTS_NONE); + theEmitter->emitIns_R_A((instruction)ins, EA_4BYTE, REG_EAX, &load, INS_OPTS_NONE); + + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_EAX, REG_ECX, 4); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_EAX, REG_ECX, 4); + + theEmitter->emitIns_R_ARX((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 1, 0); + theEmitter->emitIns_R_ARX((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 1, 0); + theEmitter->emitIns_R_ARX((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, 4); + theEmitter->emitIns_R_ARX((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, 4); + + theEmitter->emitIns_AR_R((instruction)ins, EA_8BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_nf); + theEmitter->emitIns_AR_R((instruction)ins, EA_4BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_nf); + + theEmitter->emitIns_ARX_R((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, 4, INS_OPTS_EVEX_nf); + theEmitter->emitIns_ARX_R((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, 4, INS_OPTS_EVEX_nf); + + theEmitter->emitIns_ARX_R((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf); + theEmitter->emitIns_ARX_R((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf); + + theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + + theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R10, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R10, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + } + + // Test all CC codes + for (uint32_t ins = INS_cmovo; ins <= INS_cmovg; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_R16, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_R16, REG_RCX); + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_R10, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_R10, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_RCX, 2); + theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_RAX, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_RAX, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_R10, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_R10, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_R16, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_R16, 0, 0); + theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R10, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R17, REG_R10, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + } + +} + /***************************************************************************** * Unit tests for the CCMP instructions. */ @@ -9379,6 +9474,79 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP() theEmitter->emitIns_R_C(INS_ccmpe, EA_4BYTE, REG_RAX, hnd, 4, INS_OPTS_EVEX_dfv_cf); } +/***************************************************************************** + * Unit tests for the CTEST instructions. + */ +void CodeGen::genAmd64EmitterUnitTestsCTEST() +{ + emitter* theEmitter = GetEmitter(); + genDefineTempLabel(genCreateTempLabel()); + GenTreePhysReg physReg(REG_EDX); + physReg.SetRegNum(REG_EDX); + GenTreeIndir load = indirForm(TYP_INT, &physReg); + + // ============ + // Test RR form + // ============ + + // Test all sizes + theEmitter->emitIns_R_R(INS_test, EA_4BYTE, REG_EAX, REG_ECX); + theEmitter->emitIns_R_R(INS_cteste, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_cteste, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_cteste, EA_2BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_cteste, EA_1BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + + // Test all CC codes + for (uint32_t ins = FIRST_CTEST_INSTRUCTION; ins <= LAST_CTEST_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + } + + // Test all dfv + for (int i = 0; i < 16; i++) + { + theEmitter->emitIns_R_R(INS_cteste, EA_4BYTE, REG_RAX, REG_RCX, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset)); + } + + // ============ + // Test RI form (test small and large sizes and constants) + // ============ + + theEmitter->emitIns_R_I(INS_test, EA_8BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_8BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_8BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_test, EA_4BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_4BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_4BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_test, EA_2BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_2BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_2BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_test, EA_1BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + + // ============ + // Test MR form (test small and large sizes) + // ============ + + theEmitter->emitIns_AR_R(INS_cteste, EA_1BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_AR_R(INS_cteste, EA_2BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_AR_R(INS_cteste, EA_4BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_AR_R(INS_cteste, EA_8BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + + // ============ + // Test MI form + // ============ + + theEmitter->emitIns_I_AR(INS_cteste, EA_1BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_I_AR(INS_cteste, EA_2BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_I_AR(INS_cteste, EA_4BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_I_AR(INS_cteste, EA_8BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); +} #endif // defined(DEBUG) && defined(TARGET_AMD64) #ifdef PROFILING_SUPPORTED diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 43d74e91d53ea7..c6676f6760c0c9 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -942,60 +942,121 @@ bool emitter::DoJitUseApxNDD(instruction ins) const #endif } +inline bool emitter::IsApxConditionalInstruction(instruction ins) +{ +#ifdef TARGET_AMD64 + return (IsCCMP(ins) || IsCFCMOV(ins) || IsCTEST(ins)); +#endif + return false; +} + inline bool emitter::IsCCMP(instruction ins) { +#ifdef TARGET_AMD64 return (ins >= FIRST_CCMP_INSTRUCTION && ins <= LAST_CCMP_INSTRUCTION); +#endif + return false; +} + +inline bool emitter::IsCTEST(instruction ins) +{ +#ifdef TARGET_AMD64 + return (ins >= FIRST_CTEST_INSTRUCTION && ins <= LAST_CTEST_INSTRUCTION); +#endif + return false; +} + +inline bool emitter::IsCFCMOV(instruction ins) +{ +#ifdef TARGET_AMD64 + return (ins >= FIRST_CFCMOV_INSTRUCTION && ins <= LAST_CFCMOV_INSTRUCTION); +#endif + return false; } //------------------------------------------------------------------------ -// GetCCFromCCMP: Get a condition code from a ccmp instruction +// GetCCFromIns: Get a condition code from a conditional instruction // // Arguments: // ins - The instruction to check. // // Returns: -// `insCC` representing the condition code for a ccmp instruction. -// ccmpx instructions share the same instruction encoding unlike +// `insCC` representing the condition code for a ccmp / cfcmov instruction. +// ccmpx / cfcmovx instructions share the same instruction encoding unlike // other x86 status bit instructions and instead have a CC coded into // the EVEX prefix. // -inline insCC emitter::GetCCFromCCMP(instruction ins) +inline insCC emitter::GetCCFromIns(instruction ins) { - assert(IsCCMP(ins)); + assert(IsApxConditionalInstruction(ins)); switch (ins) { +#ifdef TARGET_AMD64 case INS_ccmpo: + case INS_cfcmovo: + case INS_ctesto: return INS_CC_O; case INS_ccmpno: + case INS_cfcmovno: + case INS_ctestno: return INS_CC_NO; case INS_ccmpb: + case INS_cfcmovb: + case INS_ctestb: return INS_CC_B; case INS_ccmpae: + case INS_cfcmovae: + case INS_ctestae: return INS_CC_AE; case INS_ccmpe: + case INS_cfcmove: + case INS_cteste: return INS_CC_E; case INS_ccmpne: + case INS_cfcmovne: + case INS_ctestne: return INS_CC_NE; case INS_ccmpbe: + case INS_cfcmovbe: + case INS_ctestbe: return INS_CC_BE; case INS_ccmpa: + case INS_cfcmova: + case INS_ctesta: return INS_CC_A; case INS_ccmps: + case INS_cfcmovs: + case INS_ctests: return INS_CC_S; case INS_ccmpns: + case INS_cfcmovns: + case INS_ctestns: return INS_CC_NS; case INS_ccmpt: + case INS_cfcmovp: + case INS_ctestt: return INS_CC_TRUE; case INS_ccmpf: + case INS_cfcmovnp: + case INS_ctestf: return INS_CC_FALSE; case INS_ccmpl: + case INS_cfcmovl: + case INS_ctestl: return INS_CC_L; case INS_ccmpge: + case INS_cfcmovge: + case INS_ctestge: return INS_CC_GE; case INS_ccmple: + case INS_cfcmovle: + case INS_ctestle: return INS_CC_LE; case INS_ccmpg: + case INS_cfcmovg: + case INS_ctestg: return INS_CC_G; +#endif default: unreached(); } @@ -2043,7 +2104,8 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const #if defined(DEBUG) if (m_compiler->DoJitStressPromotedEvexEncoding()) { - return true; + // EVEX prefixed CMOV has different semantic from non-EVEX CMOV, so we should not promote CMOV in stress mode. + return !insIsCMOV(ins); } #endif // DEBUG if (IsApxOnlyInstruction(ins)) @@ -2137,7 +2199,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt { if (!IsEvexEncodableInstruction(ins)) { - // Legacy-promoted insutrcions are not labeled with Encoding_EVEX. + // Legacy-promoted instructions are not labeled with Encoding_EVEX. code |= MAP4_IN_BYTE_EVEX_PREFIX; } @@ -2170,18 +2232,21 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt code &= 0xFF7FFFFFFFFFFFFFULL; } #ifdef TARGET_AMD64 - if (IsCCMP(ins)) + if (IsCCMP(ins) || IsCTEST(ins)) { + // CCMP and CTEST have 2 special fields in the EVEX prefix: + // 1. Source condition code (SCC): EVEX.[3:0] — the instruction executes only when this condition is satisfied. + // 2. Default flags value (DFV): EVEX.[15:12] — the value written to EFLAGS when the SCC condition is not met. code &= 0xFFFF87F0FFFFFFFF; - code |= ((size_t)id->idGetEvexDFV()) << 43; - code |= ((size_t)GetCCFromCCMP(ins)) << 32; + code |= ((size_t)GetCCFromIns(ins)) << 32; // SCC goes to EVEX.[3:0] + code |= ((size_t)id->idGetEvexDFV()) << 43; // DFV goes to EVEX.[15:12] } #endif return code; } - // No APX-NDD instructions should reach code below. + // All APX-promoted-EVEX instructions should be handled above, no APX extended EVEX instruction should reach here. assert(!IsApxExtendedEvexInstruction(ins)); if (attr == EA_32BYTE) @@ -2286,7 +2351,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt default: { #ifdef TARGET_AMD64 - if (IsCCMP(id->idIns())) // Special case for conditional ins such as CCMP, CCMOV + if (IsCCMP(id->idIns()) || IsCTEST(id->idIns())) { break; } @@ -5401,7 +5466,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) // kmov instructions reach this path with EA_8BYTE size, even on x86 || IsKMOVInstruction(ins) // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded - || isPrefetch(ins)); + || isPrefetch(ins) + // cmov intructions reach this path with EA_2BYTE + || insIsCMOV(ins)); size = (attrSize == EA_2BYTE) && (ins == INS_cmpxchg) ? 4 : 3; } @@ -5580,7 +5647,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); - bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test) && (!IsCTEST(ins)); // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful // but it requires special handling of the immediate value (it is always encoded in a byte). @@ -7044,7 +7111,7 @@ void emitter::emitIns_R_I(instruction ins, UNATIVE_OFFSET sz; instrDesc* id; insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS); - bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test) && !IsCTEST(ins); // BT reg,imm might be useful but it requires special handling of the immediate value // (it is always encoded in a byte). Let's not complicate things until this is needed. @@ -8253,8 +8320,8 @@ void emitter::emitIns_R_S_I( void emitter::emitIns_R_R_A( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insOpts instOptions) { - assert(IsSimdInstruction(ins)); - assert(IsThreeOperandAVXInstruction(ins)); + assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins) || IsApxExtendedEvexInstruction(ins)); ssize_t offs = indir->Offset(); instrDesc* id = emitNewInstrAmd(attr, offs); @@ -8267,6 +8334,8 @@ void emitter::emitIns_R_R_A( SetEvexBroadcastIfNeeded(id, instOptions); SetEvexEmbMaskIfNeeded(id, instOptions); + SetEvexNdIfNeeded(id, instOptions); + SetEvexNfIfNeeded(id, instOptions); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); @@ -8275,10 +8344,14 @@ void emitter::emitIns_R_R_A( emitCurIGsize += sz; } -void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs) +void emitter::emitIns_R_R_AR + (instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, insOpts instOptions) { - assert(IsSimdInstruction(ins)); - assert(IsThreeOperandAVXInstruction(ins)); + assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins) || IsApxExtendedEvexInstruction(ins)); + + // Checking EVEX.ND and NDD compatibility together in case the ND slot is overridden by other features. + bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNddEncodableInstruction(ins); instrDesc* id = emitNewInstrAmd(attr, offs); @@ -8290,6 +8363,9 @@ void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, reg id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; + SetEvexNdIfNeeded(id, instOptions); + SetEvexNfIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); @@ -8966,7 +9042,7 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f * Add an instruction with a static member + constant. */ -void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val) +void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val, insOpts instOptions) { // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) @@ -9179,9 +9255,14 @@ void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber re assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly + sz = emitInsSizeAM(id, insCodeMI(ins), val); + id->idCodeSize(sz); + SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); + dispIns(id); emitCurIGsize += sz; } @@ -9600,6 +9681,9 @@ void emitter::emitIns_ARX_R(instruction ins, id->idIns(ins); id->idInsFmt(fmt); + SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); + id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = index; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale); @@ -12793,7 +12877,7 @@ void emitter::emitDispIns( printf(" %-9s", sstr); #ifdef TARGET_AMD64 - if (IsCCMP(id->idIns())) + if (IsCCMP(id->idIns()) || IsCTEST(id->idIns())) { // print finite set notation for DFV unsigned dfv = id->idGetEvexDFV(); @@ -14450,7 +14534,8 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Does the constant fit in a byte? // SSE/AVX do not need to modify opcode - if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test) + + if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test && !IsCTEST(ins)) { if (id->idInsFmt() != IF_ARW_SHF && !IsSimdInstruction(ins)) { @@ -14709,6 +14794,12 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { dst += emitOutputByte(dst, 0x66); } + + if (IsCFCMOV(ins)) + { + // XArch-APX-TODO: JIT does not emitt sub-32b CMOV, whether to use 16b operands in CFCMOV is to be determined. + break; + } FALLTHROUGH; } @@ -16796,7 +16887,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins))) { - assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins)); + assert(hasCodeRM(ins) && !hasCodeMI(ins) && (!hasCodeMR(ins))); code = insCodeRM(ins); code = AddX86PrefixIfNeeded(id, code, size); code = insEncodeRMreg(id, code) | (int)(size == EA_2BYTE); @@ -16890,7 +16981,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) code = AddX86PrefixIfNeeded(id, code, size); code = insEncodeMRreg(id, code); - if (ins != INS_test && !IsShiftInstruction(ins)) + if (ins != INS_test && !IsShiftInstruction(ins) && !IsCFCMOV(ins) && !IsCTEST(ins)) { code |= 2; } @@ -16915,7 +17006,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) dst += emitOutputByte(dst, 0x66); } - code |= 0x1; + if (!IsCFCMOV(ins)) + code |= 0x1; break; case EA_4BYTE: @@ -16929,7 +17021,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) assert((code & EXTENDED_EVEX_PP_BITS) == 0); } #endif - code |= 0x1; + if (!IsCFCMOV(ins)) + code |= 0x1; break; #ifdef TARGET_AMD64 @@ -16946,7 +17039,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } // Set the 'w' bit to get the large version - code |= 0x1; + if (!IsCFCMOV(ins)) + code |= 0x1; break; #endif // TARGET_AMD64 @@ -16987,11 +17081,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) regCode = insEncodeReg012(id, reg2, size, &code); } -#ifdef TARGET_AMD64 - if (TakesSimdPrefix(id) && !IsCCMP(ins)) -#else - if (TakesSimdPrefix(id)) -#endif + // ToDo: Handle CFCMOV 3 operand instruction properly + if (TakesSimdPrefix(id) && !IsApxConditionalInstruction(ins)) { // In case of AVX instructions that take 3 operands, we generally want to encode reg1 // as first source. In this case, reg1 is both a source and a destination. @@ -17142,7 +17233,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) case EA_2BYTE: case EA_4BYTE: // Set the 'w' bit to get the large version - code = insIsCMOV(ins) ? code : (code | (0x01)); + code = (insIsCMOV(ins) || IsCFCMOV(ins)) ? code : (code | (0x01)); break; #ifdef TARGET_AMD64 @@ -17152,7 +17243,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) code = AddRexWPrefix(id, code); // TODO-APX : Revisit. does xor or other cases need to be handled // differently? see emitOutputRR // Set the 'w' bit to get the large version - code = insIsCMOV(ins) ? code : (code | (0x01)); + code = (insIsCMOV(ins) || IsCFCMOV(ins)) ? code : (code | (0x01)); break; #endif // TARGET_AMD64 @@ -17254,7 +17345,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) instruction ins = id->idIns(); regNumber reg = id->idReg1(); ssize_t val = emitGetInsSC(id); - bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test) && !IsCTEST(ins); assert(!id->idHasReg2()); diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 126d1a67a3593b..e134e128bfb0bc 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -659,7 +659,7 @@ void SetEvexDFVIfNeeded(instrDesc* id, insOpts instOptions) if ((instOptions & INS_OPTS_EVEX_dfv_MASK) != 0) { assert(UsePromotedEVEXEncoding()); - assert(IsCCMP(id->idIns())); + assert(IsCCMP(id->idIns()) || IsCTEST(id->idIns())); id->idSetEvexDFV(instOptions); } #endif @@ -777,8 +777,11 @@ static bool IsRexW1Instruction(instruction ins); static bool IsRexWXInstruction(instruction ins); static bool IsRexW1EvexInstruction(instruction ins); +static bool IsApxConditionalInstruction(instruction ins); static bool IsCCMP(instruction ins); -static insCC GetCCFromCCMP(instruction ins); +static bool IsCTEST(instruction ins); +static bool IsCFCMOV(instruction ins); +static insCC GetCCFromIns(instruction ins); bool isAvx512Blendv(instruction ins) { @@ -967,7 +970,13 @@ void emitIns_R_R_A(instruction ins, GenTreeIndir* indir, insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs); +void emitIns_R_R_AR(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber base, + int offs, + insOpts instOptions = INS_OPTS_NONE); void emitIns_R_AR_R(instruction ins, emitAttr attr, @@ -1095,7 +1104,7 @@ void emitIns_R_C(instruction ins, void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs); -void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs, int val); +void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs, int val, insOpts instOptions = INS_OPTS_NONE); void emitIns_IJ(emitAttr attr, regNumber reg, unsigned base); diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index fdadb2b2f52afe..4dbae113b9c44b 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -1141,6 +1141,7 @@ INST3(vucomxss, "vucomxss", IUM_RD, BAD_CODE, BAD_ // id nm um mr mi rm lat tp tt flags +#ifdef TARGET_AMD64 #define FIRST_APX_INSTRUCTION INS_ccmpo #define FIRST_CCMP_INSTRUCTION INS_ccmpo INST3(ccmpo, "ccmpo", IUM_RD, 0x000038, 0x0003880, 0x00003A, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) @@ -1160,6 +1161,42 @@ INST3(ccmpge, "ccmpge", IUM_RD, 0x000038, 0x0003880, 0x INST3(ccmple, "ccmple", IUM_RD, 0x000038, 0x0003880, 0x00003A, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) INST3(ccmpg, "ccmpg", IUM_RD, 0x000038, 0x0003880, 0x00003A, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) #define LAST_CCMP_INSTRUCTION INS_ccmpg +#define FIRST_CFCMOV_INSTRUCTION INS_cfcmovo +INST3(cfcmovo, "cfcmovo", IUM_WR, 0x000040, BAD_CODE, 0x000040, 1C, 2X, INS_TT_NONE, Reads_OF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovno, "cfcmovno", IUM_WR, 0x000041, BAD_CODE, 0x000041, 1C, 2X, INS_TT_NONE, Reads_OF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovb, "cfcmovb", IUM_WR, 0x000042, BAD_CODE, 0x000042, 1C, 2X, INS_TT_NONE, Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovae, "cfcmovae", IUM_WR, 0x000043, BAD_CODE, 0x000043, 1C, 2X, INS_TT_NONE, Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmove, "cfcmove", IUM_WR, 0x000044, BAD_CODE, 0x000044, 1C, 2X, INS_TT_NONE, Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovne, "cfcmovne", IUM_WR, 0x000045, BAD_CODE, 0x000045, 1C, 2X, INS_TT_NONE, Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovbe, "cfcmovbe", IUM_WR, 0x000046, BAD_CODE, 0x000046, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmova, "cfcmova", IUM_WR, 0x000047, BAD_CODE, 0x000047, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovs, "cfcmovs", IUM_WR, 0x000048, BAD_CODE, 0x000048, 1C, 2X, INS_TT_NONE, Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovns, "cfcmovns", IUM_WR, 0x000049, BAD_CODE, 0x000049, 1C, 2X, INS_TT_NONE, Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovp, "cfcmovp", IUM_WR, 0x00004A, BAD_CODE, 0x00004A, 1C, 2X, INS_TT_NONE, Reads_PF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovnp, "cfcmovnp", IUM_WR, 0x00004B, BAD_CODE, 0x00004B, 1C, 2X, INS_TT_NONE, Reads_PF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovl, "cfcmovl", IUM_WR, 0x00004C, BAD_CODE, 0x00004C, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovge, "cfcmovge", IUM_WR, 0x00004D, BAD_CODE, 0x00004D, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovle, "cfcmovle", IUM_WR, 0x00004E, BAD_CODE, 0x00004E, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovg, "cfcmovg", IUM_WR, 0x00004F, BAD_CODE, 0x00004F, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +#define LAST_CFCMOV_INSTRUCTION INS_cfcmovg +#define FIRST_CTEST_INSTRUCTION INS_ctesto +INST3(ctesto, "ctesto", IUM_RD, 0x000084, 0x00008F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestno, "ctestno", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestb, "ctestb", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestae, "ctestae", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(cteste, "cteste", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestne, "ctestne", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestbe, "ctestbe", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctesta, "ctesta", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctests, "ctests", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestns, "ctestns", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestt, "ctestt", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestf, "ctestf", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestl, "ctestl", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestge, "ctestge", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestle, "ctestle", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestg, "ctestg", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +#define LAST_CTEST_INSTRUCTION INS_ctestg INST3(crc32_apx, "crc32", IUM_RW, BAD_CODE, BAD_CODE, 0x0000F0, 3C, 1C, INS_TT_NONE, INS_FLAGS_None) INST3(movbe_apx, "movbe", IUM_WR, 0x000061, BAD_CODE, 0x000060, 1C, 2X, INS_TT_NONE, INS_FLAGS_None) @@ -1180,6 +1217,7 @@ INST3(setge_apx, "setzuge", IUM_WR, SSEDBLMAP(4, 0x4D), BAD_COD INST3(setle_apx, "setzule", IUM_WR, SSEDBLMAP(4, 0x4E), BAD_CODE, BAD_CODE, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) INST3(setg_apx, "setzug", IUM_WR, SSEDBLMAP(4, 0x4F), BAD_CODE, BAD_CODE, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) #define LAST_APX_INSTRUCTION INS_setg_apx +#endif // Scalar instructions in SSE4.2 INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), 3C, 1C, INS_TT_NONE, INS_FLAGS_None) From 849f26e3362749a2505b19e2ab7f160b51b330ff Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 29 Apr 2026 12:04:20 -0700 Subject: [PATCH 2/6] Formatting. --- src/coreclr/jit/codegenxarch.cpp | 34 +++++++++++++++++++------------ src/coreclr/jit/emitxarch.cpp | 35 ++++++++++++++++++-------------- src/coreclr/jit/emitxarch.h | 19 ++++++++++------- 3 files changed, 53 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index a9359510014481..5af43f3b9398c3 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -9350,15 +9350,23 @@ void CodeGen::genAmd64EmitterUnitTestsCFCMOV() theEmitter->emitIns_ARX_R((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf); theEmitter->emitIns_ARX_R((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf); - theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); - theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); - theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); - theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); - theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); - theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); - - theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R10, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); - theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R10, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + + theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R10, REG_R16, 0, 0, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R10, REG_R16, 0, 0, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); } // Test all CC codes @@ -9384,8 +9392,10 @@ void CodeGen::genAmd64EmitterUnitTestsCFCMOV() theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_R16, 0, 0); theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd)); theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd)); - theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd)); - theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd)); theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd)); theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd)); theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R10, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); @@ -9393,7 +9403,6 @@ void CodeGen::genAmd64EmitterUnitTestsCFCMOV() theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); } - } /***************************************************************************** @@ -9528,7 +9537,6 @@ void CodeGen::genAmd64EmitterUnitTestsCTEST() theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); - // ============ // Test MR form (test small and large sizes) // ============ diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index c6676f6760c0c9..194c8536589f33 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -2235,8 +2235,10 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt if (IsCCMP(ins) || IsCTEST(ins)) { // CCMP and CTEST have 2 special fields in the EVEX prefix: - // 1. Source condition code (SCC): EVEX.[3:0] — the instruction executes only when this condition is satisfied. - // 2. Default flags value (DFV): EVEX.[15:12] — the value written to EFLAGS when the SCC condition is not met. + // 1. Source condition code (SCC): EVEX.[3:0] — the instruction executes only when this condition is + // satisfied. + // 2. Default flags value (DFV): EVEX.[15:12] — the value written to EFLAGS when the SCC condition is not + // met. code &= 0xFFFF87F0FFFFFFFF; code |= ((size_t)GetCCFromIns(ins)) << 32; // SCC goes to EVEX.[3:0] code |= ((size_t)id->idGetEvexDFV()) << 43; // DFV goes to EVEX.[15:12] @@ -7110,8 +7112,9 @@ void emitter::emitIns_R_I(instruction ins, UNATIVE_OFFSET sz; instrDesc* id; - insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS); - bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test) && !IsCTEST(ins); + insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS); + bool valInByte = + ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test) && !IsCTEST(ins); // BT reg,imm might be useful but it requires special handling of the immediate value // (it is always encoded in a byte). Let's not complicate things until this is needed. @@ -8344,8 +8347,8 @@ void emitter::emitIns_R_R_A( emitCurIGsize += sz; } -void emitter::emitIns_R_R_AR - (instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, insOpts instOptions) +void emitter::emitIns_R_R_AR( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, insOpts instOptions) { assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins) || IsApxExtendedEvexInstruction(ins)); @@ -9042,7 +9045,8 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f * Add an instruction with a static member + constant. */ -void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val, insOpts instOptions) +void emitter::emitIns_C_I( + instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val, insOpts instOptions) { // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) @@ -9255,9 +9259,8 @@ void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber re assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly - sz = emitInsSizeAM(id, insCodeMI(ins), val); - + id->idCodeSize(sz); SetEvexNfIfNeeded(id, instOptions); @@ -14797,7 +14800,8 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (IsCFCMOV(ins)) { - // XArch-APX-TODO: JIT does not emitt sub-32b CMOV, whether to use 16b operands in CFCMOV is to be determined. + // XArch-APX-TODO: JIT does not emitt sub-32b CMOV, whether to use 16b operands in CFCMOV is to be + // determined. break; } FALLTHROUGH; @@ -17341,11 +17345,12 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) { code_t code; - emitAttr size = id->idOpSize(); - instruction ins = id->idIns(); - regNumber reg = id->idReg1(); - ssize_t val = emitGetInsSC(id); - bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test) && !IsCTEST(ins); + emitAttr size = id->idOpSize(); + instruction ins = id->idIns(); + regNumber reg = id->idReg1(); + ssize_t val = emitGetInsSC(id); + bool valInByte = + ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test) && !IsCTEST(ins); assert(!id->idHasReg2()); diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index e134e128bfb0bc..265781937146b3 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -971,12 +971,12 @@ void emitIns_R_R_A(instruction ins, insOpts instOptions = INS_OPTS_NONE); void emitIns_R_R_AR(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber base, - int offs, - insOpts instOptions = INS_OPTS_NONE); + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber base, + int offs, + insOpts instOptions = INS_OPTS_NONE); void emitIns_R_AR_R(instruction ins, emitAttr attr, @@ -1104,7 +1104,12 @@ void emitIns_R_C(instruction ins, void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs); -void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs, int val, insOpts instOptions = INS_OPTS_NONE); +void emitIns_C_I(instruction ins, + emitAttr attr, + CORINFO_FIELD_HANDLE fdlHnd, + int offs, + int val, + insOpts instOptions = INS_OPTS_NONE); void emitIns_IJ(emitAttr attr, regNumber reg, unsigned base); From c1815562e0b7552564f3060d005e6dd57bcba98a Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Wed, 29 Apr 2026 12:34:27 -0700 Subject: [PATCH 3/6] resolving comments. Co-authored-by: Copilot --- src/coreclr/jit/codegenxarch.cpp | 6 ++++-- src/coreclr/jit/emitxarch.cpp | 3 +-- src/coreclr/jit/instrsxarch.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 5af43f3b9398c3..c85cce4c6a3d3c 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -8635,7 +8635,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, #ifdef TARGET_AMD64 //----------------------------------------------------------------------------------------- -// OptsFromCFlags - Convert condition flags into approxpriate insOpts. +// OptsFromCFlags - Convert condition flags into appropriate insOpts. // // Arguments: // flags - The condition flags to be converted. @@ -8645,7 +8645,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, // // Notes: // This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate -// instruction options used for setting the default flag values in extneded EVEX +// instruction options used for setting the default flag values in extended EVEX // encoding conditional instructions. // insOpts CodeGen::OptsFromCFlags(insCflags flags) @@ -8703,6 +8703,7 @@ void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp) if (intConst->IconValue() == 0) { // ctest reg, reg is 1-byte shorter encoding than ccmp reg, 0. + assert((FIRST_CTEST_INSTRUCTION - FIRST_APX_INSTRUCTION) == 32); instruction ctestIns = (instruction)(ccmpIns + FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION); emit->emitIns_R_R(ctestIns, cmpSize, srcReg1, srcReg1, opts); } @@ -9488,6 +9489,7 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP() */ void CodeGen::genAmd64EmitterUnitTestsCTEST() { + assert(FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION == 32); emitter* theEmitter = GetEmitter(); genDefineTempLabel(genCreateTempLabel()); GenTreePhysReg physReg(REG_EDX); diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 194c8536589f33..413db63c6d2ff4 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -5469,7 +5469,7 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) || IsKMOVInstruction(ins) // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded || isPrefetch(ins) - // cmov intructions reach this path with EA_2BYTE + // cmov instructions reach this path with EA_2BYTE || insIsCMOV(ins)); size = (attrSize == EA_2BYTE) && (ins == INS_cmpxchg) ? 4 : 3; @@ -17085,7 +17085,6 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) regCode = insEncodeReg012(id, reg2, size, &code); } - // ToDo: Handle CFCMOV 3 operand instruction properly if (TakesSimdPrefix(id) && !IsApxConditionalInstruction(ins)) { // In case of AVX instructions that take 3 operands, we generally want to encode reg1 diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 4dbae113b9c44b..5a1edcce43fd96 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -1180,7 +1180,7 @@ INST3(cfcmovle, "cfcmovle", IUM_WR, 0x00004E, BAD_CODE, 0x INST3(cfcmovg, "cfcmovg", IUM_WR, 0x00004F, BAD_CODE, 0x00004F, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) #define LAST_CFCMOV_INSTRUCTION INS_cfcmovg #define FIRST_CTEST_INSTRUCTION INS_ctesto -INST3(ctesto, "ctesto", IUM_RD, 0x000084, 0x00008F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctesto, "ctesto", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) INST3(ctestno, "ctestno", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) INST3(ctestb, "ctestb", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) INST3(ctestae, "ctestae", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) From c9585350bb87fa7bb2fc628fd8b6d4d00ed953a5 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Thu, 30 Apr 2026 11:40:59 -0700 Subject: [PATCH 4/6] resolve comments Co-authored-by: Copilot --- src/coreclr/jit/emitxarch.cpp | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 413db63c6d2ff4..1c06569dc1ea11 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -946,32 +946,36 @@ inline bool emitter::IsApxConditionalInstruction(instruction ins) { #ifdef TARGET_AMD64 return (IsCCMP(ins) || IsCFCMOV(ins) || IsCTEST(ins)); -#endif +#else return false; +#endif } inline bool emitter::IsCCMP(instruction ins) { #ifdef TARGET_AMD64 return (ins >= FIRST_CCMP_INSTRUCTION && ins <= LAST_CCMP_INSTRUCTION); -#endif +#else return false; +#endif } inline bool emitter::IsCTEST(instruction ins) { #ifdef TARGET_AMD64 return (ins >= FIRST_CTEST_INSTRUCTION && ins <= LAST_CTEST_INSTRUCTION); -#endif +#else return false; +#endif } inline bool emitter::IsCFCMOV(instruction ins) { #ifdef TARGET_AMD64 return (ins >= FIRST_CFCMOV_INSTRUCTION && ins <= LAST_CFCMOV_INSTRUCTION); -#endif +#else return false; +#endif } //------------------------------------------------------------------------ @@ -988,72 +992,56 @@ inline bool emitter::IsCFCMOV(instruction ins) // inline insCC emitter::GetCCFromIns(instruction ins) { - assert(IsApxConditionalInstruction(ins)); + assert(IsCTEST(ins) || IsCCMP(ins)); switch (ins) { #ifdef TARGET_AMD64 case INS_ccmpo: - case INS_cfcmovo: case INS_ctesto: return INS_CC_O; case INS_ccmpno: - case INS_cfcmovno: case INS_ctestno: return INS_CC_NO; case INS_ccmpb: - case INS_cfcmovb: case INS_ctestb: return INS_CC_B; case INS_ccmpae: - case INS_cfcmovae: case INS_ctestae: return INS_CC_AE; case INS_ccmpe: - case INS_cfcmove: case INS_cteste: return INS_CC_E; case INS_ccmpne: - case INS_cfcmovne: case INS_ctestne: return INS_CC_NE; case INS_ccmpbe: - case INS_cfcmovbe: case INS_ctestbe: return INS_CC_BE; case INS_ccmpa: - case INS_cfcmova: case INS_ctesta: return INS_CC_A; case INS_ccmps: - case INS_cfcmovs: case INS_ctests: return INS_CC_S; case INS_ccmpns: - case INS_cfcmovns: case INS_ctestns: return INS_CC_NS; case INS_ccmpt: - case INS_cfcmovp: case INS_ctestt: return INS_CC_TRUE; case INS_ccmpf: - case INS_cfcmovnp: case INS_ctestf: return INS_CC_FALSE; case INS_ccmpl: - case INS_cfcmovl: case INS_ctestl: return INS_CC_L; case INS_ccmpge: - case INS_cfcmovge: case INS_ctestge: return INS_CC_GE; case INS_ccmple: - case INS_cfcmovle: case INS_ctestle: return INS_CC_LE; case INS_ccmpg: - case INS_cfcmovg: case INS_ctestg: return INS_CC_G; #endif @@ -8353,9 +8341,6 @@ void emitter::emitIns_R_R_AR( assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins) || IsApxExtendedEvexInstruction(ins)); - // Checking EVEX.ND and NDD compatibility together in case the ND slot is overridden by other features. - bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNddEncodableInstruction(ins); - instrDesc* id = emitNewInstrAmd(attr, offs); id->idIns(ins); @@ -14800,7 +14785,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (IsCFCMOV(ins)) { - // XArch-APX-TODO: JIT does not emitt sub-32b CMOV, whether to use 16b operands in CFCMOV is to be + // XArch-APX-TODO: JIT does not emit sub-32b CMOV, whether to use 16b operands in CFCMOV is to be // determined. break; } From e45ac570b50f7b4d4ed0780646b6ff9d0ecad7d9 Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Thu, 30 Apr 2026 12:31:09 -0700 Subject: [PATCH 5/6] formatting --- src/coreclr/jit/emitxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 1c06569dc1ea11..bd3a3f6fef60cf 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -964,7 +964,7 @@ inline bool emitter::IsCTEST(instruction ins) { #ifdef TARGET_AMD64 return (ins >= FIRST_CTEST_INSTRUCTION && ins <= LAST_CTEST_INSTRUCTION); -#else +#else return false; #endif } @@ -973,7 +973,7 @@ inline bool emitter::IsCFCMOV(instruction ins) { #ifdef TARGET_AMD64 return (ins >= FIRST_CFCMOV_INSTRUCTION && ins <= LAST_CFCMOV_INSTRUCTION); -#else +#else return false; #endif } From ffa47787cff93db50df1a859e2477db71369102b Mon Sep 17 00:00:00 2001 From: Ruihan-Yin Date: Thu, 30 Apr 2026 12:59:29 -0700 Subject: [PATCH 6/6] resolve comments. Co-authored-by: Copilot --- src/coreclr/jit/emitxarch.cpp | 7 +++---- src/coreclr/jit/emitxarch.h | 2 +- src/coreclr/jit/instrsxarch.h | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index bd3a3f6fef60cf..6d44d7eaa90743 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -985,8 +985,8 @@ inline bool emitter::IsCFCMOV(instruction ins) // ins - The instruction to check. // // Returns: -// `insCC` representing the condition code for a ccmp / cfcmov instruction. -// ccmpx / cfcmovx instructions share the same instruction encoding unlike +// `insCC` representing the condition code for a ccmp / ctest instruction. +// ccmpcc / ctestcc instructions share the same instruction encoding unlike // other x86 status bit instructions and instead have a CC coded into // the EVEX prefix. // @@ -14785,8 +14785,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) if (IsCFCMOV(ins)) { - // XArch-APX-TODO: JIT does not emit sub-32b CMOV, whether to use 16b operands in CFCMOV is to be - // determined. + // The opcode for CFCMOV does not follow the rule of "|= 0x01" to handle 16b/32b/64b operands. break; } FALLTHROUGH; diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 265781937146b3..3f9d22823d9add 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -1106,7 +1106,7 @@ void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, re void emitIns_C_I(instruction ins, emitAttr attr, - CORINFO_FIELD_HANDLE fdlHnd, + CORINFO_FIELD_HANDLE fldHnd, int offs, int val, insOpts instOptions = INS_OPTS_NONE); diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 5a1edcce43fd96..f0ad61b63d7fac 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -1217,7 +1217,7 @@ INST3(setge_apx, "setzuge", IUM_WR, SSEDBLMAP(4, 0x4D), BAD_COD INST3(setle_apx, "setzule", IUM_WR, SSEDBLMAP(4, 0x4E), BAD_CODE, BAD_CODE, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) INST3(setg_apx, "setzug", IUM_WR, SSEDBLMAP(4, 0x4F), BAD_CODE, BAD_CODE, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) #define LAST_APX_INSTRUCTION INS_setg_apx -#endif +#endif // TARGET_AMD64 // Scalar instructions in SSE4.2 INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), 3C, 1C, INS_TT_NONE, INS_FLAGS_None)