Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/design/coreclr/botr/clr-abi.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ To return `Continuation` we use a volatile/calee-trash register that cannot be u
| arm | r2 |
| arm64 | x2 |
| risc-v | a2 |
| loongarch64 | a2 |

### Passing `Continuation` argument
The `Continuation` parameter is passed at the same position as generic instantiation parameter or immediately after, if both present. For x86 the argument order is reversed.
Expand Down
42 changes: 0 additions & 42 deletions src/coreclr/jit/codegenloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,14 +557,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
{
delta_PSP -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
delta_PSP -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
delta_PSP -= TARGET_POINTER_SIZE;
}

funcletFrameSize = funcletFrameSize - delta_PSP;
funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN);
Expand Down Expand Up @@ -3711,14 +3703,6 @@ int CodeGenInterface::genSPtoFPdelta() const
{
delta -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
delta -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
delta -= TARGET_POINTER_SIZE;
}

assert(delta >= 0);
return delta;
Expand Down Expand Up @@ -6133,16 +6117,6 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
preservedAreaSize += 1; // bool for synchronized methods
}

if (m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM)
{
preservedAreaSize += TARGET_POINTER_SIZE;
}

if (m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM)
{
preservedAreaSize += TARGET_POINTER_SIZE;
}

// Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
// frame
gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
Expand Down Expand Up @@ -6788,14 +6762,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe
{
localFrameSize -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
localFrameSize -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
localFrameSize -= TARGET_POINTER_SIZE;
}

#ifdef DEBUG
if (m_compiler->opts.disAsm)
Expand Down Expand Up @@ -6862,14 +6828,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
{
localFrameSize -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
localFrameSize -= TARGET_POINTER_SIZE;
}
if ((m_compiler->lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !m_compiler->opts.IsOSR())
{
localFrameSize -= TARGET_POINTER_SIZE;
}

JITDUMP("Frame type. #outsz=%d; #framesz=%d; #calleeSaveRegsPushed:%d; "
"localloc? %s\n",
Expand Down
14 changes: 0 additions & 14 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4348,20 +4348,6 @@ void Compiler::lvaFixVirtualFrameOffsets()
delta += lvaLclStackHomeSize(lvaMonAcquired);
}

if ((lvaAsyncExecutionContextVar != BAD_VAR_NUM) && !opts.IsOSR())
{
int offset = lvaTable[lvaAsyncExecutionContextVar].GetStackOffset() + delta;
lvaTable[lvaAsyncExecutionContextVar].SetStackOffset(offset);
delta += lvaLclStackHomeSize(lvaAsyncExecutionContextVar);
}

if ((lvaAsyncSynchronizationContextVar != BAD_VAR_NUM) && !opts.IsOSR())
{
int offset = lvaTable[lvaAsyncSynchronizationContextVar].GetStackOffset() + delta;
lvaTable[lvaAsyncSynchronizationContextVar].SetStackOffset(offset);
delta += lvaLclStackHomeSize(lvaAsyncSynchronizationContextVar);
}

Comment on lines -4351 to -4364
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For EnC to work it is necessary that this is allocated in the top of the stack frame and included as part of the EnC frame header (the code you deleted under the m_compiler->opts.compDbgEnC in the PR). This is similar to how lvaMonAcquired is handled.
I am ok with it, but just know that EnC will not work correctly without this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you very much for point this out!

I have some questions about this, could you please give me some suggestions?

  1. Is the EnC request to enble FEATURE_REMAP_FUNCTION? If so I think LA64 will not hit the EnC scenes temporarily. (the code under the m_compiler->opts.compDbgEnC in CodeGen::genCreateAndStoreGCInfo looks improper for LA64 now. I will delete and add an assert at it.)

  2. Are there any testing related to EnC? We can add EnC support for LA64 if necessary later. ( I used to run a hot reload demo at .NET6 and it's ok on LA64 at that time: https://github.com/jsuarezruiz/AvaloniaSkiaSharpFiddle ).

For EnC to work it is necessary that this is allocated in the top of the stack frame and included as part of the EnC frame header (the code you deleted under the m_compiler->opts.compDbgEnC in the PR). This is similar to how lvaMonAcquired is handled.

Thank you! If I understand correctly lvaAsyncExecutionContextVar and lvaAsyncExecutionContextVar are temp locals, on loongarch64 (and maybe on risc-v, as similar frame layout) if place this after lvaMonAcquired, the temp locals aera will be split by Callee saved registers and fp/ra, I think we should add a handle about this case when zeroinit the frame in CodeGen::genZeroInitFrameUsingBlockInit() to avoid clean the Callee saved registers/fp/ra on stack.

Here I think the int offset = lvaTable[lvaAsyncExecutionContextVar].GetStackOffset() + delta; , deltashould be (compCalleeRegsPushed << 3), as well as lvaMonAcquired and lvaAsyncSynchronizationContextVar.

JITDUMP("--- delta bump %d for FP frame\n", delta);
}
#elif defined(TARGET_WASM)
Expand Down
37 changes: 21 additions & 16 deletions src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
#include <unixasmmacros.inc>
#include "AsmOffsets.inc"

#define PROBE_FRAME_SIZE 0x90 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) +
#define PROBE_FRAME_SIZE 0xA0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) +
// 9 * 8 for callee saved registers +
// 1 * 8 for caller SP +
// 2 * 8 for int returns +
// 3 * 8 for int returns (a0, a1, a2) +
// 1 * 8 for alignment padding +
// 2 * 8 for FP returns

// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers
Expand Down Expand Up @@ -37,13 +38,15 @@

// Slot at $sp+0x68 is reserved for caller sp

// Save the integer return registers
// Save the integer return registers, a2 might contain an objectref (async continuation)
st.d $a0, $sp, 0x70
st.d $a1, $sp, 0x78
st.d $a2, $sp, 0x80
// Slot at [sp, #0x88] is alignment padding

// Save the FP return registers
fst.d $f0, $sp, 0x80
fst.d $f1, $sp, 0x88
fst.d $f0, $sp, 0x90
fst.d $f1, $sp, 0x98

// Perform the rest of the PInvokeTransitionFrame initialization.
st.d \threadReg, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread // Thread * (unused by stackwalker)
Expand All @@ -66,10 +69,11 @@
// Restore the integer return registers
ld.d $a0, $sp, 0x70
ld.d $a1, $sp, 0x78
ld.d $a2, $sp, 0x80

// Restore the FP return registers
fld.d $f0, $sp, 0x80
fld.d $f1, $sp, 0x88
fld.d $f0, $sp, 0x90
fld.d $f1, $sp, 0x98

// Restore callee saved registers
EPILOG_RESTORE_REG_PAIR 23, 24, 0x20
Expand All @@ -89,25 +93,26 @@
// All registers correct for return to the original return address.
//
// Register state on exit:
// a2: thread pointer
// a4: thread pointer
// a0, a1, a2: preserved
//
.macro FixupHijackedCallstack

// a2 <- GetThread()
INLINE_GETTHREAD $a2
// a4 <- GetThread()
INLINE_GETTHREAD $a4

//
// Fix the stack by restoring the original return address
//
// Load m_pvHijackedReturnAddress
ld.d $ra, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress
ld.d $ra, $a4, OFFSETOF__Thread__m_pvHijackedReturnAddress

//
// Clear hijack state
//
// Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress
st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation
st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8
st.d $zero, $a4, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation
st.d $zero, $a4, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8
.endm

//
Expand All @@ -122,16 +127,16 @@ NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler
jirl $r0, $ra, 0

LOCAL_LABEL(WaitForGC):
li.d $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_THREAD_HIJACK)
li.d $t3, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5 + PTFF_SAVE_R6 + PTFF_THREAD_HIJACK)
b C_FUNC(RhpWaitForGC)
NESTED_END RhpGcProbeHijack

.global C_FUNC(RhpThrowHwEx)

NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler
PUSH_PROBE_FRAME $a2, $a3, $t3
PUSH_PROBE_FRAME $a4, $a3, $t3

ld.d $a0, $a2, OFFSETOF__Thread__m_pDeferredTransitionFrame
ld.d $a0, $a4, OFFSETOF__Thread__m_pDeferredTransitionFrame
bl C_FUNC(RhpWaitForGC2)

POP_PROBE_FRAME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ C_FUNC(\Name):
#define PTFF_SAVE_SP 0x00000200
#define PTFF_SAVE_R4 0x00000800
#define PTFF_SAVE_R5 0x00001000
#define PTFF_SAVE_R6 0x00002000
#define PTFF_SAVE_ALL_PRESERVED 0x000001FF // NOTE: r23-r31
#define PTFF_THREAD_HIJACK 0x80000000

Expand Down
12 changes: 8 additions & 4 deletions src/coreclr/vm/loongarch64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -637,10 +637,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
// save any integral return value(s)
st.d $a0, $sp, 88
st.d $a1, $sp, 96
// save async continuation return value
st.d $a2, $sp, 104

// save any FP return value(s)
fst.d $f0, $sp, 104
fst.d $f1, $sp, 112
fst.d $f0, $sp, 112
fst.d $f1, $sp, 120

ori $a0, $sp, 0
bl C_FUNC(OnHijackWorker)
Expand All @@ -650,10 +652,12 @@ NESTED_ENTRY OnHijackTripThread, _TEXT, NoHandler
// restore any integral return value(s)
ld.d $a0, $sp, 88
ld.d $a1, $sp, 96
// restore async continuation return value
ld.d $a2, $sp, 104

// restore any FP return value(s)
fld.d $f0, $sp, 104
fld.d $f1, $sp, 112
fld.d $f0, $sp, 112
fld.d $f1, $sp, 120

EPILOG_RESTORE_REG_PAIR 23, 24, 16
EPILOG_RESTORE_REG_PAIR 25, 26, 32
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/vm/loongarch64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,11 @@ struct HijackArgs
size_t ReturnValue[2];
};
union
{
DWORD64 A2;
size_t AsyncRet;
};
union
{
struct {
DWORD64 F0;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/vm/loongarch64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,9 +474,11 @@ void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats

pRD->pCurrentContext->A0 = m_Args->A0;
pRD->pCurrentContext->A1 = m_Args->A1;
pRD->pCurrentContext->A2 = m_Args->A2;

pRD->volatileCurrContextPointers.A0 = &m_Args->A0;
pRD->volatileCurrContextPointers.A1 = &m_Args->A1;
pRD->volatileCurrContextPointers.A2 = &m_Args->A2;

pRD->pCurrentContext->S0 = m_Args->S0;
pRD->pCurrentContext->S1 = m_Args->S1;
Expand Down
Loading