diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 5a933a511f1d4a..421f3455fe4ac7 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -217,6 +217,7 @@ class CodeGen final : public CodeGenInterface ArrayStack* wasmControlFlowStack = nullptr; unsigned wasmCursor = 0; unsigned wasmExtraControlFlowDepth = 0; + unsigned wasmSpillRefIndex = 0; unsigned findTargetDepth(BasicBlock* target); void WasmProduceReg(GenTree* node); regNumber GetMultiUseOperandReg(GenTree* operand); diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 00b5ccf2740ac2..f69d63db476afa 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -459,6 +459,9 @@ void CodeGen::genCodeForBlock(BasicBlock* block) #endif #ifdef TARGET_WASM + // Reset spill counter at block boundaries. + wasmSpillRefIndex = 0; + // genHomeRegisterParams can generate arbitrary amounts of code on Wasm, so // we have moved it out of the prolog to the first basic block in order to // work around the restriction that the prolog can only be one insGroup. diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 0bf6b32b214fa5..4fd271c46ad8fe 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -858,6 +858,7 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; case GT_CALL: + wasmSpillRefIndex = 0; genCall(treeNode->AsCall()); break; @@ -911,6 +912,36 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) GetEmitter()->emitIns(INS_unreachable); break; + case GT_WASM_SPILL_REF: + { + const unsigned splashZoneVar = m_compiler->m_wasmSpillSlots->at(0); + noway_assert(wasmSpillRefIndex + 1 < m_compiler->m_wasmSpillSlots->size()); + const unsigned spillTargetVar = m_compiler->m_wasmSpillSlots->at(wasmSpillRefIndex + 1); + unsigned splashZoneLclIndex; + bool FPBased; + + { + LclVarDsc* varDsc = m_compiler->lvaGetDesc(splashZoneVar); + assert(genIsValidReg(varDsc->GetRegNum())); + splashZoneLclIndex = WasmRegToIndex(varDsc->GetRegNum()); + + GetEmitter()->emitIns_I(INS_local_tee, EA_PTRSIZE, splashZoneLclIndex); + } + + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, GetFramePointerRegIndex()); + m_compiler->lvaFrameAddress(spillTargetVar, &FPBased); + GetEmitter()->emitIns_S(INS_I_const, EA_PTRSIZE, spillTargetVar, 0); + GetEmitter()->emitIns(INS_I_add); + + GetEmitter()->emitIns_I(INS_local_get, EA_PTRSIZE, splashZoneLclIndex); + + instruction ins = ins_Store(TYP_BYREF); + GetEmitter()->emitIns_I(ins, EA_PTRSIZE, 0); + + wasmSpillRefIndex++; + break; + } + case GT_CATCH_ARG: genCatchArg(treeNode); break; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a786daa966f13b..9dabdf18b98e36 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5053,6 +5053,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // keep the Virtual IP updated. // DoPhase(this, PHASE_WASM_VIRTUAL_IP, &Compiler::fgWasmVirtualIP); + + // Ensure that any refs or byrefs live at call sites are spilled + // to pinned stack slots so the objects aren't moved. + // + DoPhase(this, PHASE_WASM_SPILL_REFS, &Compiler::WasmSpillRefs); #endif FinalizeEH(); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b25ce125243fa1..1fa701c9f600c3 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4244,6 +4244,7 @@ class Compiler unsigned lvaWasmVirtualIP = BAD_VAR_NUM; // Wasm virtual IP slot unsigned lvaWasmFunctionIndex = BAD_VAR_NUM; // Wasm function index slot unsigned lvaWasmResumeIP = BAD_VAR_NUM; // Wasm catch resumption IP slot + jitstd::vector* m_wasmSpillSlots = nullptr; #endif // defined(TARGET_WASM) unsigned lvaInlinedPInvokeFrameVar = BAD_VAR_NUM; // variable representing the InlinedCallFrame @@ -4646,6 +4647,7 @@ class Compiler unsigned lvaTrackedIndexToLclNum(unsigned trackedIndex) { assert(trackedIndex < lvaTrackedCount); + assert(trackedIndex < lvaTrackedToVarNumSize); unsigned lclNum = lvaTrackedToVarNum[trackedIndex]; assert(lclNum < lvaCount); return lclNum; @@ -6757,6 +6759,7 @@ class Compiler PhaseStatus fgWasmControlFlow(); PhaseStatus fgWasmTransformSccs(); PhaseStatus fgWasmVirtualIP(); + PhaseStatus WasmSpillRefs(); #ifdef DEBUG void fgDumpWasmControlFlow(); void fgDumpWasmControlFlowDot(); diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index b6cc1a7cfc251b..f35cf6ce0e0a64 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -73,6 +73,7 @@ CompMemKindMacro(RangeCheckCloning) CompMemKindMacro(WasmSccTransform) CompMemKindMacro(WasmCfgLowering) CompMemKindMacro(WasmEH) +CompMemKindMacro(WasmSpillRefs) //clang-format on #undef CompMemKindMacro diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index d49a6cafbb063d..8c7b83b60974d6 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -130,6 +130,7 @@ CompPhaseNameMacro(PHASE_DFS_BLOCKS_WASM, "Wasm remove unreachable bl CompPhaseNameMacro(PHASE_WASM_EH_FLOW, "Wasm eh control flow", false, -1, false) CompPhaseNameMacro(PHASE_WASM_TRANSFORM_SCCS, "Wasm transform sccs", false, -1, false) CompPhaseNameMacro(PHASE_WASM_CONTROL_FLOW, "Wasm control flow", false, -1, false) +CompPhaseNameMacro(PHASE_WASM_SPILL_REFS, "Wasm spill refs", false, -1, false) CompPhaseNameMacro(PHASE_WASM_VIRTUAL_IP, "Wasm virtual IP", false, -1, false) CompPhaseNameMacro(PHASE_ASYNC, "Transform async", false, -1, true) diff --git a/src/coreclr/jit/fgwasm.cpp b/src/coreclr/jit/fgwasm.cpp index b417a598e934f1..fe2c69a6e47ead 100644 --- a/src/coreclr/jit/fgwasm.cpp +++ b/src/coreclr/jit/fgwasm.cpp @@ -1671,6 +1671,113 @@ PhaseStatus Compiler::fgWasmControlFlow() return PhaseStatus::MODIFIED_EVERYTHING; } +PhaseStatus Compiler::WasmSpillRefs() +{ + bool anyChanges = false; + + size_t highWaterMark = 0; + jitstd::vector defs(getAllocator(CMK_WasmSpillRefs)); + + for (BasicBlock* const block : Blocks()) + { + // LIR edges cannot span blocks, so we can safely clear the list of live values per-block + defs.clear(); + + for (GenTree* tree : LIR::AsRange(block)) + { + if (tree->IsCall()) + { + highWaterMark = std::max(highWaterMark, defs.size()); + + if (defs.size()) + { + JITDUMP("Spilling %d live ref(s) for call\n", defs.size()); + DISPNODE(tree); + for (GenTree* def : defs) + { + JITDUMP(" "); + DISPNODE(def); + GenTreeUnOp* spill = gtNewOperNode(GT_WASM_SPILL_REF, def->TypeGet(), def); + LIR::Use use; + noway_assert(LIR::AsRange(block).TryGetUse(def, &use)); + use.ReplaceWith(spill); + LIR::AsRange(block).InsertAfter(def, spill); + anyChanges = true; + } + + defs.clear(); + } + } + + // FIXME: Should this happen before the spilling of the live defs list? + // I think the answer is no, because live defs being passed as arguments to the current call + // are not guaranteed to ever end up in memory where the GC can see them unless we spill + // them. If we can somehow guarantee that all callees will spill their ref parameters + // immediately, we could do this before the block above. + // Remove used nodes from defs list, they're no longer meaningfully 'live'. + tree->VisitOperands([&defs](GenTree* op) { + if (!op->IsValue()) + return GenTree::VisitResult::Continue; + if (!op->TypeIs(TYP_REF, TYP_BYREF)) + return GenTree::VisitResult::Continue; + + for (size_t i = defs.size(); i > 0; i--) + { + if (op == defs[i - 1]) + { + defs[i - 1] = defs[defs.size() - 1]; + defs.erase(defs.begin() + (defs.size() - 1), defs.end()); + break; + } + } + + return GenTree::VisitResult::Continue; + }); + + if (tree->IsValue() && tree->TypeIs(TYP_REF, TYP_BYREF) && !tree->OperIs(GT_WASM_SPILL_REF)) + { + // TODO: Can we skip this for GT_LCL_VAR when it lives in memory? Or is it possible + // that the LCL_VAR has been modified since it was loaded onto the Wasm stack? + defs.push_back(tree); + } + } + } + + JITDUMP("High water mark for refs was %d\n", highWaterMark); + if (highWaterMark == 0) + return PhaseStatus::MODIFIED_NOTHING; + + m_wasmSpillSlots = new (this, CMK_WasmSpillRefs) jitstd::vector(highWaterMark + 1, 0, getAllocator(CMK_WasmSpillRefs)); + + // Allocate a temporary wasm local to use as a scratch slot during spills + { + const unsigned varNum = lvaGrabTemp(false DEBUGARG("WasmSpillRefs splash zone")); + LclVarDsc* const varDsc = lvaGetDesc(varNum); + // HACK: Make this TYP_I_IMPL because if we make it a REF or BYREF that may block enregistration + varDsc->lvType = TYP_I_IMPL; + varDsc->lvHasExplicitInit = true; + varDsc->lvImplicitlyReferenced = true; + // HACK: If we don't make this var tracked, regalloc will crash when allocating a register for it + varDsc->lvTracked = true; + m_wasmSpillSlots->at(0) = varNum; + } + + // Allocate N temporary refs to act as GC-visible storage for all spills that occur during execution + for (size_t i = 0; i < highWaterMark; i++) + { + const unsigned varNum = lvaGrabTemp(false DEBUGARG("WasmSpillRefs spill slot")); + LclVarDsc* const varDsc = lvaGetDesc(varNum); + varDsc->lvType = TYP_BYREF; + varDsc->lvPinned = true; + varDsc->lvImplicitlyReferenced = true; + varDsc->lvMustInit = true; + lvaSetVarDoNotEnregister(varNum, DoNotEnregisterReason::WasmGCVisibility); + m_wasmSpillSlots->at(i + 1) = varNum; + } + + return PhaseStatus::MODIFIED_EVERYTHING; +} + #ifdef DEBUG //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 3af72048706405..9c35c9b077a637 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -11767,6 +11767,9 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) case GT_RETURN_SUSPEND: case GT_PATCHPOINT_FORCED: case GT_NONLOCAL_JMP: +#ifdef TARGET_WASM + case GT_WASM_SPILL_REF: +#endif m_edge = &m_node->AsUnOp()->gtOp1; assert(*m_edge != nullptr); m_advance = &GenTreeUseEdgeIterator::Terminate; diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index b293df525ed695..b6c5e498113623 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -357,6 +357,7 @@ GTNODE(SWIFT_ERROR_RET , GenTreeOp ,0,1,GTK_BINOP|GTK_NOVALUE) // Retu GTNODE(WASM_JEXCEPT , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Special jump for Wasm exception handling GTNODE(WASM_THROW_REF , GenTree ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR) // Wasm rethrow host exception (exception is an implicit operand) +GTNODE(WASM_SPILL_REF , GenTreeOp ,0,0,GTK_UNOP|DBK_NOTHIR) //----------------------------------------------------------------------------- // Nodes used by Lower to generate a closer CPU representation of other nodes diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index e6e4a0969f3ba0..f9d1d353901800 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -190,6 +190,12 @@ void WasmRegAlloc::IdentifyCandidates() varIsRegCandidate = false; } + // HACK: Ensure that we always enregister the splash zone, even if we are not enregistering other locals + if (m_compiler->m_wasmSpillSlots && m_compiler->m_wasmSpillSlots->size() && m_compiler->m_wasmSpillSlots->at(0) == lclNum) + { + varIsRegCandidate = true; + } + if (varIsRegCandidate) { JITDUMP("RA candidate: V%02u\n", lclNum);