Skip to content

Commit

Permalink
Fix problem with fgExpandQmarkForCastInstOf (#97493)
Browse files Browse the repository at this point in the history
This function expands a nested qmark representing a cast/isinst
into multiple blocks of flow.

I found a problem, with JitOptRepeat, that exists in non-JitOptRepeat
compiles but apparently doesn't lead to problems. The result of the
qmark is assigned to a variable. This variable is multiply used during
the expansion of the qmarks. This causes problems with subsequent
optimizations if the variable already has a known exact type in the
importer due to lvaSetClass/lvaUpdateClass. While the variable might
have that known type after the full qmark evaluation, it is not
guaranteed to have that type in the middle of the evaluation. In particular,
the qmark expansion for isinst converts types that don't match the
type being tested into null, so the "known type" isn't valid until the
value is null.

The solution is to create a new temp for the intermediate results,
and assign the final result at the end of the expansion.

This leads to quite a lot of downstream churn and diffs. E.g.,
the `fgOptimizeUncondBranchToSimpleCond` optimization doesn't kick
in any more on these expansions.
  • Loading branch information
BruceForstall committed Feb 15, 2024
1 parent 4d37918 commit f2d5b2f
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 28 deletions.
4 changes: 2 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19005,7 +19005,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetArrayElementClassHandle(GenTree* array)

CORINFO_CLASS_HANDLE Compiler::gtGetFieldClassHandle(CORINFO_FIELD_HANDLE fieldHnd, bool* pIsExact, bool* pIsNonNull)
{
CORINFO_CLASS_HANDLE fieldClass = nullptr;
CORINFO_CLASS_HANDLE fieldClass = NO_CLASS_HANDLE;
CorInfoType fieldCorType = info.compCompHnd->getFieldType(fieldHnd, &fieldClass);

if (fieldCorType == CORINFO_TYPE_CLASS)
Expand All @@ -19019,7 +19019,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetFieldClassHandle(CORINFO_FIELD_HANDLE fieldH
#if DEBUG
char fieldNameBuffer[128];
char classNameBuffer[128];
JITDUMP("Querying runtime about current class of field %s (declared as %s)\n",
JITDUMP("\nQuerying runtime about current class of field %s (declared as %s)\n",
eeGetFieldName(fieldHnd, true, fieldNameBuffer, sizeof(fieldNameBuffer)),
eeGetClassName(fieldClass, classNameBuffer, sizeof(classNameBuffer)));
#endif // DEBUG
Expand Down
64 changes: 38 additions & 26 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14517,12 +14517,11 @@ GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = N
//
// Notes:
//
// For a castclass helper call,
// Importer creates the following tree:
// For a castclass helper call, importer creates the following tree:
// tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
//
// This method splits the qmark expression created by the importer into the
// following blocks: (block, asg, cond1, cond2, helper, remainder)
// following blocks: (block, asg, cond1, cond2, helper, remainder).
// Notice that op1 is the result for both the conditions. So we coalesce these
// assignments into a single block instead of two blocks resulting a nested diamond.
//
Expand All @@ -14533,17 +14532,24 @@ GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = N
// block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
//
// We expect to achieve the following codegen:
// mov rsi, rdx tmp = op1 // asgBlock
// test rsi, rsi goto skip if tmp == null ? // cond1Block
// mov rsi, rdx tmp2 = op1 // asgBlock
// test rsi, rsi goto skip if tmp2 == null ? // cond1Block
// je SKIP
// mov rcx, 0x76543210 cns = op2 // cond2Block
// cmp qword ptr [rsi], rcx goto skip if *tmp == op2
// mov rcx, 0x76543210 cns = op2 // cond2Block
// cmp qword ptr [rsi], rcx goto skip if *tmp2 == op2
// je SKIP
// call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp = helper(cns, tmp) // helperBlock
// call CORINFO_HELP_CHKCASTCLASS_SPECIAL tmp2 = helper(cns, tmp2) // helperBlock
// mov rsi, rax
// SKIP: // remainderBlock
// SKIP: // remainderBlock
// mov rdi, rsi tmp = tmp2
// tmp has the result.
//
// Note that we can't use `tmp` during the computation of the result: we must create a new temp,
// and only assign `tmp` to the final value. This is because `tmp` may already have been annotated
// via lvaSetClass/lvaUpdateClass as having a known type. This is only true after the full expansion,
// where any other type gets converted to null. If we used `tmp` during the expansion, then it would
// appear to subsequent optimizations that cond2Block (where the type is checked) is unnecessary.
//
bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt)
{
#ifdef DEBUG
Expand Down Expand Up @@ -14585,10 +14591,10 @@ bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt)
}
else
{
// This is a rare case that arises when we are doing minopts and encounter isinst of null
// gtFoldExpr was still is able to optimize away part of the tree (but not all).
// This is a rare case that arises when we are doing minopts and encounter isinst of null.
// gtFoldExpr was still able to optimize away part of the tree (but not all).
// That means it does not match our pattern.

//
// Rather than write code to handle this case, just fake up some nodes to make it match the common
// case. Synthesize a comparison that is always true, and for the result-on-true, use the
// entire subtree we expected to be the nested question op.
Expand All @@ -14604,7 +14610,7 @@ bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt)
// block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
//
// We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
// if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
// if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this
// for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
// remainderBlock will still be GC safe.
BasicBlockFlags propagateFlags = block->GetFlagsRaw() & BBF_GC_SAFE_POINT;
Expand Down Expand Up @@ -14669,6 +14675,7 @@ bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt)
// {
// [weight 0.5 * <100 - likelihood of FastType>]
// }
// }
//
cond2Block->inheritWeightPercentage(cond1Block, 50);
helperBlock->inheritWeightPercentage(cond2Block, nestedQmarkElseLikelihood);
Expand All @@ -14683,14 +14690,12 @@ bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt)
jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo());
fgInsertStmtAtEnd(cond2Block, jmpStmt);

unsigned dstLclNum = dst->AsLclVarCommon()->GetLclNum();
unsigned tmp2 = lvaGrabTemp(false DEBUGARG("CastInstOf QMark result"));
lvaGetDesc(tmp2)->lvType = dst->TypeGet();

// AsgBlock should get tmp = op1.
GenTree* trueExprStore =
dst->OperIs(GT_STORE_LCL_FLD)
? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), dst->AsLclFld()->GetLclOffs(), trueExpr)
: gtNewStoreLclVarNode(dstLclNum, trueExpr)->AsLclVarCommon();
Statement* trueStmt = fgNewStmtFromTree(trueExprStore, stmt->GetDebugInfo());
// AsgBlock should get tmp2 = op1.
GenTree* trueExprStore = gtNewStoreLclVarNode(tmp2, trueExpr)->AsLclVarCommon();
Statement* trueStmt = fgNewStmtFromTree(trueExprStore, stmt->GetDebugInfo());
fgInsertStmtAtEnd(asgBlock, trueStmt);

// Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
Expand All @@ -14706,14 +14711,21 @@ bool Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, Statement* stmt)
}
else
{
GenTree* helperExprStore =
dst->OperIs(GT_STORE_LCL_FLD)
? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), dst->AsLclFld()->GetLclOffs(), true2Expr)
: gtNewStoreLclVarNode(dstLclNum, true2Expr)->AsLclVarCommon();
Statement* helperStmt = fgNewStmtFromTree(helperExprStore, stmt->GetDebugInfo());
GenTree* helperExprStore = gtNewStoreLclVarNode(tmp2, true2Expr)->AsLclVarCommon();
Statement* helperStmt = fgNewStmtFromTree(helperExprStore, stmt->GetDebugInfo());
fgInsertStmtAtEnd(helperBlock, helperStmt);
}

// RemainderBlock should get tmp = tmp2.
GenTree* tmp2CopyLcl = gtNewLclvNode(tmp2, dst->TypeGet());
unsigned dstLclNum = dst->AsLclVarCommon()->GetLclNum();
GenTree* resultCopy =
dst->OperIs(GT_STORE_LCL_FLD)
? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), dst->AsLclFld()->GetLclOffs(), tmp2CopyLcl)
: gtNewStoreLclVarNode(dstLclNum, tmp2CopyLcl)->AsLclVarCommon();
Statement* resultCopyStmt = fgNewStmtFromTree(resultCopy, stmt->GetDebugInfo());
fgInsertStmtAtBeg(remainderBlock, resultCopyStmt);

// Finally remove the nested qmark stmt.
fgRemoveStmt(block, stmt);

Expand Down Expand Up @@ -15425,7 +15437,7 @@ PhaseStatus Compiler::fgRetypeImplicitByRefArgs()
(nonCallAppearances <= varDsc->lvFieldCnt));

#ifdef DEBUG
// Above is a profitability heurisic; either value of
// Above is a profitability heuristic; either value of
// undoPromotion should lead to correct code. So,
// under stress, make different decisions at times.
if (compStressCompile(STRESS_BYREF_PROMOTION, 25))
Expand Down

0 comments on commit f2d5b2f

Please sign in to comment.