diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index dc436ed0ff9e82..d27f9f6139df06 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4349,6 +4349,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl return; } + DoPhase(this, PHASE_EARLY_QMARK_EXPANSION, [this]() { + return fgExpandQmarkNodes(/*early*/ true); + }); + // If instrumenting, add block and class probes. // if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR)) @@ -4546,7 +4550,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Decide the kind of code we want to generate fgSetOptions(); - fgExpandQmarkNodes(); + fgExpandQmarkNodes(/*early*/ false); #ifdef DEBUG compCurBB = nullptr; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index abd9666e5114bf..0ff34295c27ff7 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4600,6 +4600,7 @@ class Compiler const BYTE* codeEndp, BoxPatterns opts); void impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken); + bool impImportAndPushBoxForNullable(CORINFO_RESOLVED_TOKEN* pResolvedToken); void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo); @@ -5571,8 +5572,8 @@ class Compiler Statement* fgNewStmtFromTree(GenTree* tree, const DebugInfo& di); GenTreeQmark* fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst = nullptr); - bool fgExpandQmarkStmt(BasicBlock* block, Statement* stmt); - void fgExpandQmarkNodes(); + bool fgExpandQmarkStmt(BasicBlock* block, Statement* stmt, bool onlyEarlyQmarks); + PhaseStatus fgExpandQmarkNodes(bool early); bool fgSimpleLowerCastOfSmpOp(LIR::Range& range, GenTreeCast* cast); bool fgSimpleLowerBswap16(LIR::Range& range, GenTree* op); @@ -7439,6 +7440,7 @@ class Compiler #define OMF_HAS_EXPANDABLE_CAST 0x00080000 // Method contains casts eligible for late expansion #define OMF_HAS_STACK_ARRAY 0x00100000 // Method contains stack allocated arrays #define OMF_HAS_BOUNDS_CHECKS 0x00200000 // Method contains bounds checks +#define OMF_HAS_EARLY_QMARKS 0x00400000 // Method contains early expandable QMARKs // clang-format on diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index c284ba905f4542..191ed978601c69 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -63,6 +63,7 @@ CompPhaseNameMacro(PHASE_GS_COOKIE, "GS Cookie", CompPhaseNameMacro(PHASE_COMPUTE_BLOCK_WEIGHTS, "Compute block weights", false, -1, false) CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", false, -1, false) CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE, "Head and tail merge", false, -1, false) +CompPhaseNameMacro(PHASE_EARLY_QMARK_EXPANSION, "Early QMARK expansion", false, -1, false) CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", false, -1, false) CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", false, -1, false) CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE2, "Post-morph head and tail merge", false, -1, false) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 0726b9b9f8846f..e7ebe43a7bc2a4 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -502,6 +502,8 @@ enum GenTreeFlags : unsigned GTF_BOX_CLONED = 0x40000000, // GT_BOX -- this box and its operand has been cloned, cannot assume it to be single-use anymore GTF_BOX_VALUE = 0x80000000, // GT_BOX -- "box" is on a value type + GTF_QMARK_EARLY_EXPAND = 0x01000000, // GT_QMARK -- early expansion of the QMARK node is required + GTF_ARR_ADDR_NONNULL = 0x80000000, // GT_ARR_ADDR -- this array's address is not null #define HANDLE_KIND_INDEX_SHIFT 24 @@ -5937,6 +5939,16 @@ struct GenTreeQmark : public GenTreeOp gtThenLikelihood = thenLikelihood; } + bool IsEarlyExpandableQmark() const + { + return gtFlags & GTF_QMARK_EARLY_EXPAND; + } + + void SetEarlyExpandableQmark() + { + gtFlags |= GTF_QMARK_EARLY_EXPAND; + } + #if DEBUGGABLE_GENTREE GenTreeQmark() : GenTreeOp() diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e9e4bc20ee5931..749309e8d7c09b 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3355,6 +3355,107 @@ int Compiler::impBoxPatternMatch(CORINFO_RESOLVED_TOKEN* pResolvedToken, return -1; } +//------------------------------------------------------------------------ +// impImportAndPushBoxForNullable: import a "box Nullable<>" as an inlined sequence: +// arg._hasValue == 0 ? +// null : +// (lcl = allocobj; *(lcl + sizeof(void*)) = arg._value; lcl) +// +// Arguments: +// pResolvedToken - resolved token from the box operation +// +// Return Value: +// true if inlined, false otherwise. +// +bool Compiler::impImportAndPushBoxForNullable(CORINFO_RESOLVED_TOKEN* pResolvedToken) +{ + CORINFO_CLASS_HANDLE nullableCls = pResolvedToken->hClass; + assert(info.compCompHnd->getBoxHelper(nullableCls) == CORINFO_HELP_BOX_NULLABLE); + + if (opts.OptimizationDisabled() || compCurBB->isRunRarely()) + { + return false; + } + + if (eeIsSharedInst(nullableCls) || IsReadyToRun()) + { + // TODO-CQ: Enable the optimization for shared generics and R2R scenarios. + // The current machinery requires a ResolvedToken (basically, 'newobj underlyingType' + // that we don't have). + return false; + } + + GenTree* nullableObj = impPopStack().val; + + // Decompose the Nullable<> arg into _hasValue and _value fields + // and calculate the type and layout of the 'value' field + // + GenTreeFlags indirFlags = GTF_EMPTY; + nullableObj = impGetNodeAddr(nullableObj, CHECK_SPILL_ALL, &indirFlags); + GenTree* nullableObjClone; + nullableObj = impCloneExpr(nullableObj, &nullableObjClone, CHECK_SPILL_ALL, nullptr DEBUGARG("nullable obj clone")); + + CORINFO_FIELD_HANDLE valueFldHnd = info.compCompHnd->getFieldInClass(nullableCls, 1); + CORINFO_CLASS_HANDLE valueStructCls = NO_CLASS_HANDLE; + static_assert(OFFSETOF__CORINFO_NullableOfT__hasValue == 0); + unsigned cnsValueOffset = info.compCompHnd->getFieldOffset(valueFldHnd); + ClassLayout* layout = nullptr; + CorInfoType corFldType = info.compCompHnd->getFieldType(valueFldHnd, &valueStructCls); + var_types valueType = TypeHandleToVarType(corFldType, valueStructCls, &layout); + GenTree* valueOffset = gtNewIconNode(cnsValueOffset, TYP_I_IMPL); + GenTree* valueAddr = gtNewOperNode(GT_ADD, TYP_BYREF, nullableObjClone, valueOffset); + GenTree* value = gtNewLoadValueNode(valueType, layout, valueAddr); + GenTree* hasValue = gtNewLoadValueNode(TYP_UBYTE, nullptr, nullableObj); + + // Create the allocation node for the box + // + CORINFO_CLASS_HANDLE typeToBox = info.compCompHnd->getTypeForBox(nullableCls); + bool hasSideEffects; + CorInfoHelpFunc helperTemp = info.compCompHnd->getNewHelper(typeToBox, &hasSideEffects); + GenTree* typeToBoxHnd = gtNewIconEmbClsHndNode(typeToBox); + GenTreeAllocObj* allocObj = gtNewAllocObjNode(helperTemp, hasSideEffects, typeToBox, TYP_REF, typeToBoxHnd); + + // Now we need to copy value into the allocated box + // + unsigned objLclNum = lvaGrabTemp(true DEBUGARG("obj nullable box")); + GenTree* storeAlloc = gtNewTempStore(objLclNum, allocObj); + GenTree* objLcl = gtNewLclvNode(objLclNum, TYP_REF); + GenTree* pOffset = gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL); + GenTreeOp* dataPtr = gtNewOperNode(GT_ADD, TYP_BYREF, gtCloneExpr(objLcl), pOffset); + GenTree* storeData = gtNewStoreValueNode(valueType, layout, dataPtr, gtCloneExpr(value), GTF_IND_NONFAULTING); + + // Wrap it all in two commas, it will look like: + // lcl = allocobj + // *(lcl + sizeof(void*)) = value + // lcl + // + GenTreeOp* copyData = gtNewOperNode(GT_COMMA, TYP_REF, storeData, gtCloneExpr(objLcl)); + GenTreeOp* allocRoot = gtNewOperNode(GT_COMMA, TYP_REF, storeAlloc, copyData); + + // QMARK expansion will propagate block flags properly. + compCurBB->SetFlags(BBF_HAS_NEWOBJ); + optMethodFlags |= OMF_HAS_NEWOBJ | OMF_HAS_EARLY_QMARKS; + + GenTreeOp* cond = gtNewOperNode(GT_EQ, TYP_INT, hasValue, gtNewIconNode(0)); + GenTreeColon* colon = gtNewColonNode(TYP_REF, gtNewNull(), allocRoot); + GenTreeQmark* qmark = gtNewQmarkNode(TYP_REF, cond, colon); + + // We have to expand early since GT_ALLOCOBJ must be a top-level statement + qmark->SetEarlyExpandableQmark(); + + // QMARK has to be a top-level statement + const unsigned result = lvaGrabTemp(true DEBUGARG("spilling qmarkNullableBox")); + impStoreToTemp(result, qmark, CHECK_SPILL_ALL); + lvaSetClass(result, typeToBox, true); + lvaSetClass(objLclNum, typeToBox, true); + impPushOnStack(gtNewLclvNode(result, TYP_REF), typeInfo(typeToBox)); + + JITDUMP(" inlined BOX(%s) as QMARK allocating box and copying fields:\n", eeGetClassName(nullableCls)); + DISPTREE(qmark); + JITDUMP("\n"); + return true; +} + //------------------------------------------------------------------------ // impImportAndPushBox: build and import a value-type box // @@ -3405,15 +3506,25 @@ void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken) impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("async box with call")); } + // Look at what helper we should use. + CorInfoHelpFunc boxHelper = info.compCompHnd->getBoxHelper(pResolvedToken->hClass); + + // Expand nullable boxes inline in optimized code in hot paths, it's slightly different from + // other expansions since we have to use QMARK to return null for nullables with no value. + // + // obj = nullable._hasValue == 0 ? null : (boxed underlying value) + // + if ((boxHelper == CORINFO_HELP_BOX_NULLABLE) && impImportAndPushBoxForNullable(pResolvedToken)) + { + return; + } + // Get get the expression to box from the stack. GenTree* op1 = nullptr; GenTree* op2 = nullptr; StackEntry se = impPopStack(); GenTree* exprToBox = se.val; - // Look at what helper we should use. - CorInfoHelpFunc boxHelper = info.compCompHnd->getBoxHelper(pResolvedToken->hClass); - // Determine what expansion to prefer. // // In size/time/debuggable constrained modes, the helper call diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index e2293dacb4d939..7f843cbd69c951 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14396,8 +14396,9 @@ GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = N // fgExpandQmarkStmt: expand a qmark into control flow // // Arguments: -// block - block containing the qmark -// stmt - statement containing the qmark +// block - block containing the qmark +// stmt - statement containing the qmark +// onlyEarlyQmarks - if true, only expand qmarks that are marked for early expansion // // Returns: // true if the expansion introduced a throwing block @@ -14452,7 +14453,7 @@ GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = N // If the qmark assigns to a variable, then create tmps for "then" // and "else" results and assign the temp to the variable as a writeback step. // -bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) +bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt, bool onlyEarlyQmarks) { bool introducedThrow = false; GenTree* expr = stmt->GetRootNode(); @@ -14465,6 +14466,11 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) return false; } + if (onlyEarlyQmarks && !qmark->IsEarlyExpandableQmark()) + { + return false; + } + #ifdef DEBUG if (verbose) { @@ -14642,6 +14648,14 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) } else { + // There is no good reason to keep root COMMAs when we can split them into separate statements. + while (trueExpr->OperIs(GT_COMMA)) + { + Statement* trueStmt = fgNewStmtFromTree(trueExpr->gtGetOp1(), stmt->GetDebugInfo()); + trueExpr = trueExpr->gtGetOp2(); + fgInsertStmtAtEnd(thenBlock, trueStmt); + } + if (dst != nullptr) { trueExpr = dst->OperIs(GT_STORE_LCL_FLD) ? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), @@ -14665,6 +14679,14 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) } else { + // There is no good reason to keep root COMMAs when we can split them into separate statements. + while (falseExpr->OperIs(GT_COMMA)) + { + Statement* falseStmt = fgNewStmtFromTree(falseExpr->gtGetOp1(), stmt->GetDebugInfo()); + falseExpr = falseExpr->gtGetOp2(); + fgInsertStmtAtEnd(elseBlock, falseStmt); + } + if (dst != nullptr) { falseExpr = @@ -14688,33 +14710,47 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) return introducedThrow; } -/***************************************************************************** - * - * Expand GT_QMARK nodes from the flow graph into basic blocks. - * - */ - -void Compiler::fgExpandQmarkNodes() +//------------------------------------------------------------------------ +// fgExpandQmarkNodes: expand all QMARK nodes into control flow. +// +// Arguments: +// early - whether this is the early expansion phase. Late expansion +// happens too late for some optimizations, e.g. object stack-alloc. +// +// Returns: +// Suitable phase status. +// +PhaseStatus Compiler::fgExpandQmarkNodes(bool early) { + if (early && ((optMethodFlags & OMF_HAS_EARLY_QMARKS) == 0)) + { + return PhaseStatus::MODIFIED_NOTHING; + } + + if (!compQmarkUsed) + { + return PhaseStatus::MODIFIED_NOTHING; + } + bool introducedThrows = false; - if (compQmarkUsed) + for (BasicBlock* const block : Blocks()) { - for (BasicBlock* const block : Blocks()) + for (Statement* const stmt : block->Statements()) { - for (Statement* const stmt : block->Statements()) - { - GenTree* expr = stmt->GetRootNode(); -#ifdef DEBUG - fgPreExpandQmarkChecks(expr); -#endif - introducedThrows |= fgExpandQmarkStmt(block, stmt); - } + GenTree* expr = stmt->GetRootNode(); + INDEBUG(fgPreExpandQmarkChecks(expr)); + introducedThrows |= fgExpandQmarkStmt(block, stmt, early); } + } + #ifdef DEBUG + if (!early) + { fgPostExpandQmarkChecks(); -#endif } +#endif + compQmarkRationalized = true; // TODO: if qmark expansion created throw blocks, try and merge them @@ -14723,6 +14759,8 @@ void Compiler::fgExpandQmarkNodes() { JITDUMP("Qmark expansion created new throw blocks\n"); } + + return PhaseStatus::MODIFIED_EVERYTHING; } //------------------------------------------------------------------------