From b73ef36fd07cca608a1cb4aafb491541b671b871 Mon Sep 17 00:00:00 2001 From: xuliangyu Date: Tue, 3 Mar 2026 19:11:25 +0800 Subject: [PATCH] [LoongArch64] Enable Runtime Async. (issue#124935) * Enable Runtime Async of LoongArch64 https://github.com/dotnet/runtime/issues/124935 * Fix up hijacking on loongarch64 (preserve async continuation register). * Revert `lvaAsyncExecutionContextVar, lvaAsyncSynchronizationContextVar` related offset according to LA64's frame layout. --- docs/design/coreclr/botr/clr-abi.md | 1 + src/coreclr/jit/codegenloongarch64.cpp | 42 ------------------- src/coreclr/jit/lclvars.cpp | 14 ------- .../nativeaot/Runtime/loongarch64/GcProbe.S | 37 +++++++++------- .../Runtime/unix/unixasmmacrosloongarch64.inc | 1 + src/coreclr/vm/loongarch64/asmhelpers.S | 12 ++++-- src/coreclr/vm/loongarch64/cgencpu.h | 5 +++ src/coreclr/vm/loongarch64/stubs.cpp | 2 + 8 files changed, 38 insertions(+), 76 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 889a4c9cde656b..3f1cf8dd3fa090 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -116,6 +116,7 @@ To return `Continuation` we use a volatile/calee-trash register that cannot be u | arm | r2 | | arm64 | x2 | | risc-v | a2 | +| loongarch64 | a2 | ### Passing `Continuation` argument The `Continuation` parameter is passed at the same position as generic instantiation parameter or immediately after, if both present. For x86 the argument order is reversed. diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 8cb00e60920b59..8df4e02f64e766 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -557,14 +557,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() { delta_PSP -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta_PSP -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta_PSP -= TARGET_POINTER_SIZE; - } funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); @@ -3711,14 +3703,6 @@ int CodeGenInterface::genSPtoFPdelta() const { delta -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - delta -= TARGET_POINTER_SIZE; - } assert(delta >= 0); return delta; @@ -6133,16 +6117,6 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, preservedAreaSize += 1; // bool for synchronized methods } - if (m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) - { - preservedAreaSize += TARGET_POINTER_SIZE; - } - - if (m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) - { - preservedAreaSize += TARGET_POINTER_SIZE; - } - // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the // frame gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize); @@ -6788,14 +6762,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe { localFrameSize -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } #ifdef DEBUG if (m_compiler->opts.disAsm) @@ -6862,14 +6828,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) { localFrameSize -= TARGET_POINTER_SIZE; } - if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } - if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR()) - { - localFrameSize -= TARGET_POINTER_SIZE; - } JITDUMP("Frame type. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; " "localloc? %s\n", diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 6238a4883335aa..9f0c0ce10ea8b1 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4348,20 +4348,6 @@ void Compiler::lvaFixVirtualFrameOffsets() delta += lvaLclStackHomeSize(lvaMonAcquired); } - if ((lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !opts.IsOSR()) - { - int offset = lvaTable[lvaAsyncExecutionContextVar].GetStackOffset() + delta; - lvaTable[lvaAsyncExecutionContextVar].SetStackOffset(offset); - delta += lvaLclStackHomeSize(lvaAsyncExecutionContextVar); - } - - if ((lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !opts.IsOSR()) - { - int offset = lvaTable[lvaAsyncSynchronizationContextVar].GetStackOffset() + delta; - lvaTable[lvaAsyncSynchronizationContextVar].SetStackOffset(offset); - delta += lvaLclStackHomeSize(lvaAsyncSynchronizationContextVar); - } - JITDUMP("--- delta bump %d for FP frame\n", delta); } #elif defined(TARGET_WASM) diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S index 118af7c53ca1ab..e022a907caa1c0 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S @@ -4,10 +4,11 @@ #include #include "AsmOffsets.inc" -#define PROBE_FRAME_SIZE 0x90 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + +#define PROBE_FRAME_SIZE 0xA0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + // 9 * 8 for callee saved registers + // 1 * 8 for caller SP + - // 2 * 8 for int returns + + // 3 * 8 for int returns (a0, a1, a2) + + // 1 * 8 for alignment padding + // 2 * 8 for FP returns // See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers @@ -37,13 +38,15 @@ // Slot at $sp+0x68 is reserved for caller sp - // Save the integer return registers + // Save the integer return registers, a2 might contain an objectref (async continuation) st.d $a0, $sp, 0x70 st.d $a1, $sp, 0x78 + st.d $a2, $sp, 0x80 + // Slot at [sp, #0x88] is alignment padding // Save the FP return registers - fst.d $f0, $sp, 0x80 - fst.d $f1, $sp, 0x88 + fst.d $f0, $sp, 0x90 + fst.d $f1, $sp, 0x98 // Perform the rest of the PInvokeTransitionFrame initialization. st.d \threadReg, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread // Thread * (unused by stackwalker) @@ -66,10 +69,11 @@ // Restore the integer return registers ld.d $a0, $sp, 0x70 ld.d $a1, $sp, 0x78 + ld.d $a2, $sp, 0x80 // Restore the FP return registers - fld.d $f0, $sp, 0x80 - fld.d $f1, $sp, 0x88 + fld.d $f0, $sp, 0x90 + fld.d $f1, $sp, 0x98 // Restore callee saved registers EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 @@ -89,25 +93,26 @@ // All registers correct for return to the original return address. // // Register state on exit: -// a2: thread pointer +// a4: thread pointer +// a0, a1, a2: preserved // .macro FixupHijackedCallstack - // a2 <- GetThread() - INLINE_GETTHREAD $a2 + // a4 <- GetThread() + INLINE_GETTHREAD $a4 // // Fix the stack by restoring the original return address // // Load m_pvHijackedReturnAddress - ld.d $ra, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + ld.d $ra, $a4, OFFSETOF__Thread__m_pvHijackedReturnAddress // // Clear hijack state // // Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress - st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation - st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8 + st.d $zero, $a4, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d $zero, $a4, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8 .endm // @@ -122,16 +127,16 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler jirl $r0, $ra, 0 LOCAL_LABEL(WaitForGC): - li.d $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_THREAD_HIJACK) + li.d $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_SAVE_R6 + PTFF_THREAD_HIJACK) b C_FUNC(RhpWaitForGC) NESTED_END RhpGcProbeHijack .global C_FUNC(RhpThrowHwEx) NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler - PUSH_PROBE_FRAME $a2, $a3, $t3 + PUSH_PROBE_FRAME $a4, $a3, $t3 - ld.d $a0, $a2, OFFSETOF__Thread__m_pDeferredTransitionFrame + ld.d $a0, $a4, OFFSETOF__Thread__m_pDeferredTransitionFrame bl C_FUNC(RhpWaitForGC2) POP_PROBE_FRAME diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc index 265a188f82eb4a..cf3583aae5ba87 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -198,6 +198,7 @@ C_FUNC(\Name): #define PTFF_SAVE_SP 0x00000200 #define PTFF_SAVE_R4 0x00000800 #define PTFF_SAVE_R5 0x00001000 +#define PTFF_SAVE_R6 0x00002000 #define PTFF_SAVE_ALL_PRESERVED 0x000001FF // NOTE: r23-r31 #define PTFF_THREAD_HIJACK 0x80000000 diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 9f424c39dd30f5..bd6b7beec16007 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -637,10 +637,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler // save any integral return value(s) st.d $a0, $sp, 88 st.d $a1, $sp, 96 + // save async continuation return value + st.d $a2, $sp, 104 // save any FP return value(s) - fst.d $f0, $sp, 104 - fst.d $f1, $sp, 112 + fst.d $f0, $sp, 112 + fst.d $f1, $sp, 120 ori $a0, $sp, 0 bl C_FUNC(OnHijackWorker) @@ -650,10 +652,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler // restore any integral return value(s) ld.d $a0, $sp, 88 ld.d $a1, $sp, 96 + // restore async continuation return value + ld.d $a2, $sp, 104 // restore any FP return value(s) - fld.d $f0, $sp, 104 - fld.d $f1, $sp, 112 + fld.d $f0, $sp, 112 + fld.d $f1, $sp, 120 EPILOG_RESTORE_REG_PAIR 23, 24, 16 EPILOG_RESTORE_REG_PAIR 25, 26, 32 diff --git a/src/coreclr/vm/loongarch64/cgencpu.h b/src/coreclr/vm/loongarch64/cgencpu.h index eb12a56d8dfd19..039a85d1782c02 100644 --- a/src/coreclr/vm/loongarch64/cgencpu.h +++ b/src/coreclr/vm/loongarch64/cgencpu.h @@ -429,6 +429,11 @@ struct HijackArgs size_t ReturnValue[2]; }; union + { + DWORD64 A2; + size_t AsyncRet; + }; + union { struct { DWORD64 F0; diff --git a/src/coreclr/vm/loongarch64/stubs.cpp b/src/coreclr/vm/loongarch64/stubs.cpp index b974c9511f3a63..8f69312046cb30 100644 --- a/src/coreclr/vm/loongarch64/stubs.cpp +++ b/src/coreclr/vm/loongarch64/stubs.cpp @@ -474,9 +474,11 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats pRD->pCurrentContext->A0 = m_Args->A0; pRD->pCurrentContext->A1 = m_Args->A1; + pRD->pCurrentContext->A2 = m_Args->A2; pRD->volatileCurrContextPointers.A0 = &m_Args->A0; pRD->volatileCurrContextPointers.A1 = &m_Args->A1; + pRD->volatileCurrContextPointers.A2 = &m_Args->A2; pRD->pCurrentContext->S0 = m_Args->S0; pRD->pCurrentContext->S1 = m_Args->S1;