Skip to content
Permalink
4895a06c0d
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
2136 lines (1678 sloc) 72.3 KB
; Licensed to the .NET Foundation under one or more agreements.
; The .NET Foundation licenses this file to you under the MIT license.
; See the LICENSE file in the project root for more information.
;; ==++==
;;
;;
;; ==--==
#include "ksarm.h"
#include "asmconstants.h"
#include "asmmacros.h"
SETALIAS CTPMethodTable__s_pThunkTable, ?s_pThunkTable@CTPMethodTable@@0PAVMethodTable@@A
SETALIAS g_pObjectClass, ?g_pObjectClass@@3PAVMethodTable@@A
IMPORT JIT_InternalThrow
IMPORT JIT_WriteBarrier
IMPORT TheUMEntryPrestubWorker
IMPORT CreateThreadBlockThrow
IMPORT UMThunkStubRareDisableWorker
IMPORT PreStubWorker
IMPORT PreStubGetMethodDescForCompactEntryPoint
IMPORT NDirectImportWorker
IMPORT ObjIsInstanceOfNoGC
IMPORT ArrayStoreCheck
IMPORT VSD_ResolveWorker
IMPORT $g_pObjectClass
#ifdef WRITE_BARRIER_CHECK
SETALIAS g_GCShadow, ?g_GCShadow@@3PAEA
SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PAEA
IMPORT g_lowest_address
IMPORT $g_GCShadow
IMPORT $g_GCShadowEnd
#endif // WRITE_BARRIER_CHECK
#ifdef FEATURE_COMINTEROP
IMPORT CLRToCOMWorker
IMPORT ComPreStubWorker
IMPORT COMToCLRWorker
#endif
IMPORT CallDescrWorkerUnwindFrameChainHandler
IMPORT UMEntryPrestubUnwindFrameChainHandler
IMPORT UMThunkStubUnwindFrameChainHandler
#ifdef FEATURE_COMINTEROP
IMPORT ReverseComUnwindFrameChainHandler
#endif
#ifdef FEATURE_HIJACK
IMPORT OnHijackWorker
#endif ;FEATURE_HIJACK
IMPORT GetCurrentSavedRedirectContext
;; Imports to support virtual import fixup for ngen images
IMPORT VirtualMethodFixupWorker
;; Import to support cross-moodule external method invocation in ngen images
IMPORT ExternalMethodFixupWorker
IMPORT StubDispatchFixupWorker
#ifdef FEATURE_READYTORUN
IMPORT DynamicHelperWorker
#endif
IMPORT JIT_RareDisableHelperWorker
IMPORT DoJITFailFast
IMPORT s_gsCookie
IMPORT g_TrapReturningThreads
;; Imports for singleDomain statics helpers
IMPORT JIT_GetSharedNonGCStaticBase_Helper
IMPORT JIT_GetSharedGCStaticBase_Helper
TEXTAREA
;; LPVOID __stdcall GetCurrentIP(void);
LEAF_ENTRY GetCurrentIP
mov r0, lr
bx lr
LEAF_END
;; LPVOID __stdcall GetCurrentSP(void);
LEAF_ENTRY GetCurrentSP
mov r0, sp
bx lr
LEAF_END
;;-----------------------------------------------------------------------------
;; This helper routine enregisters the appropriate arguments and makes the
;; actual call.
;;-----------------------------------------------------------------------------
;;void CallDescrWorkerInternal(CallDescrData * pCallDescrData);
NESTED_ENTRY CallDescrWorkerInternal,,CallDescrWorkerUnwindFrameChainHandler
PROLOG_PUSH {r4,r5,r7,lr}
PROLOG_STACK_SAVE r7
mov r5,r0 ; save pCallDescrData in r5
ldr r1, [r5,#CallDescrData__numStackSlots]
cbz r1, Ldonestack
;; Add frame padding to ensure frame size is a multiple of 8 (a requirement of the OS ABI).
;; We push four registers (above) and numStackSlots arguments (below). If this comes to an odd number
;; of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set,
;; extend the stack another four bytes".
lsls r2, r1, #2
and r3, r2, #4
sub sp, sp, r3
;; This loop copies numStackSlots words
;; from [pSrcEnd-4,pSrcEnd-8,...] to [sp-4,sp-8,...]
ldr r0, [r5,#CallDescrData__pSrc]
add r0,r0,r2
Lstackloop
ldr r2, [r0,#-4]!
str r2, [sp,#-4]!
subs r1, r1, #1
bne Lstackloop
Ldonestack
;; If FP arguments are supplied in registers (r3 != NULL) then initialize all of them from the pointer
;; given in r3. Do not use "it" since it faults in floating point even when the instruction is not executed.
ldr r3, [r5,#CallDescrData__pFloatArgumentRegisters]
cbz r3, LNoFloatingPoint
vldm r3, {s0-s15}
LNoFloatingPoint
;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 12]
;; into r0, ..., r3
ldr r4, [r5,#CallDescrData__pArgumentRegisters]
ldm r4, {r0-r3}
CHECK_STACK_ALIGNMENT
;; call pTarget
;; Note that remoting expect target in r4.
ldr r4, [r5,#CallDescrData__pTarget]
blx r4
ldr r3, [r5,#CallDescrData__fpReturnSize]
;; Save FP return value if appropriate
cbz r3, LFloatingPointReturnDone
;; Float return case
;; Do not use "it" since it faults in floating point even when the instruction is not executed.
cmp r3, #4
bne LNoFloatReturn
vmov r0, s0
b LFloatingPointReturnDone
LNoFloatReturn
;; Double return case
;; Do not use "it" since it faults in floating point even when the instruction is not executed.
cmp r3, #8
bne LNoDoubleReturn
vmov r0, r1, s0, s1
b LFloatingPointReturnDone
LNoDoubleReturn
add r2, r5, #CallDescrData__returnValue
cmp r3, #16
bne LNoFloatHFAReturn
vstm r2, {s0-s3}
b LReturnDone
LNoFloatHFAReturn
cmp r3, #32
bne LNoDoubleHFAReturn
vstm r2, {d0-d3}
b LReturnDone
LNoDoubleHFAReturn
EMIT_BREAKPOINT ; Unreachable
LFloatingPointReturnDone
;; Save return value into retbuf
str r0, [r5, #(CallDescrData__returnValue + 0)]
str r1, [r5, #(CallDescrData__returnValue + 4)]
LReturnDone
#ifdef _DEBUG
;; trash the floating point registers to ensure that the HFA return values
;; won't survive by accident
vldm sp, {d0-d3}
#endif
EPILOG_STACK_RESTORE r7
EPILOG_POP {r4,r5,r7,pc}
NESTED_END
;;-----------------------------------------------------------------------------
;; This helper routine is where returns for irregular tail calls end up
:: so they can dynamically pop their stack arguments.
;;-----------------------------------------------------------------------------
;
; Stack Layout (stack grows up, 0 at the top, offsets relative to frame pointer, r7):
;
; sp -> callee stack arguments
; :
; :
; -0Ch gsCookie
; TailCallHelperFrame ->
; -08h __VFN_table
; -04h m_Next
; r7 ->
; +00h m_calleeSavedRgisters.r4
; +04h .r5
; +08h .r6
; +0Ch .r7
; +10h .r8
; +14h .r9
; +18h .r10
; r11->
; +1Ch .r11
; +20h .r14 -or- m_ReturnAddress
;
; r6 -> GetThread()
; r5 -> r6->m_pFrame (old Frame chain head)
; r11 is used to preserve the ETW call stack
NESTED_ENTRY TailCallHelperStub
;
; This prolog is never executed, but we keep it here for reference
; and for the unwind data it generates
;
; Spill callee saved registers and return address.
PROLOG_PUSH {r4-r11,lr}
PROLOG_STACK_SAVE r7
;
; This is the code that would have to run to setup this frame
; like the C++ helper does before calling RtlRestoreContext
;
; Allocate space for the rest of the frame and GSCookie.
; PROLOG_STACK_ALLOC 0x0C
;
; Set r11 for frame chain
;add r11, r7, 0x1C
;
; Set the vtable for TailCallFrame
;bl TCF_GETMETHODFRAMEVPTR
;str r0, [r7, #-8]
;
; Initialize the GSCookie within the Frame
;ldr r0, =s_gsCookie
;str r0, [r7, #-0x0C]
;
; Link the TailCallFrameinto the Frame chain
; and initialize r5 & r6 for unlinking later
;CALL_GETTHREAD
;mov r6, r0
;ldr r5, [r6, #Thread__m_pFrame]
;str r5, [r7, #-4]
;sub r0, r7, 8
;str r0, [r6, #Thread__m_pFrame]
;
; None of the previous stuff is ever executed,
; but we keep it here for reference
;
;
; Here's the pretend call (make it real so the unwinder
; doesn't think we're in the prolog)
;
bl TailCallHelperStub
;
; with the real return address pointing to this real epilog
;
JIT_TailCallHelperStub_ReturnAddress
EXPORT JIT_TailCallHelperStub_ReturnAddress
;
; Our epilog (which also unlinks the StubHelperFrame)
; Be careful not to trash the return registers
;
#ifdef _DEBUG
ldr r3, =s_gsCookie
ldr r3, [r3]
ldr r2, [r7, #-0x0C]
cmp r2, r3
beq GoodGSCookie
bl DoJITFailFast
GoodGSCookie
#endif ; _DEBUG
;
; unlink the TailCallFrame
;
str r5, [r6, #Thread__m_pFrame]
;
; epilog
;
EPILOG_STACK_RESTORE r7
EPILOG_POP {r4-r11,lr}
EPILOG_RETURN
NESTED_END
; ------------------------------------------------------------------
; void LazyMachStateCaptureState(struct LazyMachState *pState);
LEAF_ENTRY LazyMachStateCaptureState
;; marks that this is not yet valid
mov r1, #0
str r1, [r0, #MachState__isValid]
str lr, [r0, #LazyMachState_captureIp]
str sp, [r0, #LazyMachState_captureSp]
add r1, r0, #LazyMachState_captureR4_R11
stm r1, {r4-r11}
mov pc, lr
LEAF_END
; void SinglecastDelegateInvokeStub(Delegate *pThis)
LEAF_ENTRY SinglecastDelegateInvokeStub
cmp r0, #0
beq LNullThis
ldr r12, [r0, #DelegateObject___methodPtr]
ldr r0, [r0, #DelegateObject___target]
bx r12
LNullThis
mov r0, #CORINFO_NullReferenceException_ASM
b JIT_InternalThrow
LEAF_END
;
; r12 = UMEntryThunk*
;
NESTED_ENTRY TheUMEntryPrestub,,UMEntryPrestubUnwindFrameChainHandler
PROLOG_PUSH {r0-r4,lr}
PROLOG_VPUSH {d0-d7}
CHECK_STACK_ALIGNMENT
mov r0, r12
bl TheUMEntryPrestubWorker
; Record real target address in r12.
mov r12, r0
; Epilog
EPILOG_VPOP {d0-d7}
EPILOG_POP {r0-r4,lr}
EPILOG_BRANCH_REG r12
NESTED_END
;
; r12 = UMEntryThunk*
;
NESTED_ENTRY UMThunkStub,,UMThunkStubUnwindFrameChainHandler
PROLOG_PUSH {r4,r5,r7,r11,lr}
PROLOG_PUSH {r0-r3,r12}
PROLOG_STACK_SAVE r7
GBLA UMThunkStub_HiddenArg ; offset of saved UMEntryThunk *
GBLA UMThunkStub_StackArgs ; offset of original stack args (total size of UMThunkStub frame)
UMThunkStub_HiddenArg SETA 4*4
UMThunkStub_StackArgs SETA 10*4
CHECK_STACK_ALIGNMENT
; r0 = GetThread(). Trashes r5
INLINE_GETTHREAD r0, r5
cbz r0, UMThunkStub_DoThreadSetup
UMThunkStub_HaveThread
mov r5, r0 ; r5 = Thread *
ldr r2, =g_TrapReturningThreads
mov r4, 1
str r4, [r5, #Thread__m_fPreemptiveGCDisabled]
ldr r3, [r2]
cbnz r3, UMThunkStub_DoTrapReturningThreads
UMThunkStub_InCooperativeMode
ldr r12, [r7, #UMThunkStub_HiddenArg]
ldr r3, [r12, #UMEntryThunk__m_pUMThunkMarshInfo]
ldr r2, [r3, #UMThunkMarshInfo__m_cbActualArgSize]
cbz r2, UMThunkStub_ArgumentsSetup
add r0, r7, #UMThunkStub_StackArgs ; Source pointer
add r0, r0, r2
lsr r1, r2, #2 ; Count of stack slots to copy
and r2, r2, #4 ; Align the stack
sub sp, sp, r2
UMThunkStub_StackLoop
ldr r2, [r0,#-4]!
str r2, [sp,#-4]!
subs r1, r1, #1
bne UMThunkStub_StackLoop
UMThunkStub_ArgumentsSetup
ldr r4, [r3, #UMThunkMarshInfo__m_pILStub]
; reload argument registers
ldm r7, {r0-r3}
CHECK_STACK_ALIGNMENT
blx r4
UMThunkStub_PostCall
mov r4, 0
str r4, [r5, #Thread__m_fPreemptiveGCDisabled]
EPILOG_STACK_RESTORE r7
EPILOG_STACK_FREE 4 * 5
EPILOG_POP {r4,r5,r7,r11,pc}
UMThunkStub_DoThreadSetup
sub sp, #SIZEOF__FloatArgumentRegisters
vstm sp, {d0-d7}
bl CreateThreadBlockThrow
vldm sp, {d0-d7}
add sp, #SIZEOF__FloatArgumentRegisters
b UMThunkStub_HaveThread
UMThunkStub_DoTrapReturningThreads
sub sp, #SIZEOF__FloatArgumentRegisters
vstm sp, {d0-d7}
mov r0, r5 ; Thread* pThread
ldr r1, [r7, #UMThunkStub_HiddenArg] ; UMEntryThunk* pUMEntry
bl UMThunkStubRareDisableWorker
vldm sp, {d0-d7}
add sp, #SIZEOF__FloatArgumentRegisters
b UMThunkStub_InCooperativeMode
NESTED_END
INLINE_GETTHREAD_CONSTANT_POOL
; ------------------------------------------------------------------
NESTED_ENTRY ThePreStub
PROLOG_WITH_TRANSITION_BLOCK
add r0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
mov r1, r12 ; pMethodDesc
bl PreStubWorker
mov r12, r0
EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
EPILOG_BRANCH_REG r12
NESTED_END
; ------------------------------------------------------------------
NESTED_ENTRY ThePreStubCompactARM
; r12 - address of compact entry point + PC_REG_RELATIVE_OFFSET
PROLOG_WITH_TRANSITION_BLOCK
mov r0, r12
bl PreStubGetMethodDescForCompactEntryPoint
mov r12, r0 ; pMethodDesc
EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
b ThePreStub
NESTED_END
; ------------------------------------------------------------------
; This method does nothing. It's just a fixed function for the debugger to put a breakpoint on.
LEAF_ENTRY ThePreStubPatch
nop
ThePreStubPatchLabel
EXPORT ThePreStubPatchLabel
bx lr
LEAF_END
; ------------------------------------------------------------------
; The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk
PROLOG_PUSH {r0-r4,lr} ; Spill general argument registers, return address and
; arbitrary register to keep stack aligned
PROLOG_VPUSH {d0-d7} ; Spill floating point argument registers
CHECK_STACK_ALIGNMENT
mov r0, r12
bl NDirectImportWorker
mov r12, r0
EPILOG_VPOP {d0-d7}
EPILOG_POP {r0-r4,lr}
; If we got back from NDirectImportWorker, the MD has been successfully
; linked. Proceed to execute the original DLL call.
EPILOG_BRANCH_REG r12
NESTED_END
; ------------------------------------------------------------------
; The call in fixup precode initally points to this function.
; The pupose of this function is to load the MethodDesc and forward the call the prestub.
NESTED_ENTRY PrecodeFixupThunk
; r12 = FixupPrecode *
PROLOG_PUSH {r0-r1}
; Inline computation done by FixupPrecode::GetMethodDesc()
ldrb r0, [r12, #3] ; m_PrecodeChunkIndex
ldrb r1, [r12, #2] ; m_MethodDescChunkIndex
add r12,r12,r0,lsl #3
add r0,r12,r0,lsl #2
ldr r0, [r0,#8]
add r12,r0,r1,lsl #2
EPILOG_POP {r0-r1}
EPILOG_BRANCH ThePreStub
NESTED_END
; ------------------------------------------------------------------
; void ResolveWorkerAsmStub(r0, r1, r2, r3, r4:IndirectionCellAndFlags, r12:DispatchToken)
;
; The stub dispatch thunk which transfers control to VSD_ResolveWorker.
NESTED_ENTRY ResolveWorkerAsmStub
PROLOG_WITH_TRANSITION_BLOCK
add r0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
mov r2, r12 ; token
; indirection cell in r4 - should be consistent with REG_ARM_STUB_SPECIAL
bic r1, r4, #3 ; indirection cell
and r3, r4, #3 ; flags
bl VSD_ResolveWorker
mov r12, r0
EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
EPILOG_BRANCH_REG r12
NESTED_END
; ------------------------------------------------------------------
; void ResolveWorkerChainLookupAsmStub(r0, r1, r2, r3, r4:IndirectionCellAndFlags, r12:DispatchToken)
NESTED_ENTRY ResolveWorkerChainLookupAsmStub
; ARMSTUB TODO: implement chained lookup
b ResolveWorkerAsmStub
NESTED_END
#if defined(FEATURE_COMINTEROP)
; ------------------------------------------------------------------
; setStubReturnValue
; r0 - size of floating point return value (MetaSig::GetFPReturnSize())
; r1 - pointer to the return buffer in the stub frame
LEAF_ENTRY setStubReturnValue
cbz r0, NoFloatingPointRetVal
;; Float return case
;; Do not use "it" since it faults in floating point even when the instruction is not executed.
cmp r0, #4
bne LNoFloatRetVal
vldr s0, [r1]
bx lr
LNoFloatRetVal
;; Double return case
;; Do not use "it" since it faults in floating point even when the instruction is not executed.
cmp r0, #8
bne LNoDoubleRetVal
vldr d0, [r1]
bx lr
LNoDoubleRetVal
cmp r0, #16
bne LNoFloatHFARetVal
vldm r1, {s0-s3}
bx lr
LNoFloatHFARetVal
cmp r0, #32
bne LNoDoubleHFARetVal
vldm r1, {d0-d3}
bx lr
LNoDoubleHFARetVal
EMIT_BREAKPOINT ; Unreachable
NoFloatingPointRetVal
;; Restore the return value from retbuf
ldr r0, [r1]
ldr r1, [r1, #4]
bx lr
LEAF_END
#endif // FEATURE_COMINTEROP
#if defined(FEATURE_COMINTEROP)
; ------------------------------------------------------------------
; Function used by remoting/COM interop to get floating point return value (since it's not in the same
; register(s) as non-floating point values).
;
; On entry;
; r0 : size of the FP result (4 or 8 bytes)
; r1 : pointer to 64-bit buffer to receive result
;
; On exit:
; buffer pointed to by r1 on entry contains the float or double argument as appropriate
;
LEAF_ENTRY getFPReturn
cmp r0, #4
bne LgetFP8
vmov r2, s0
str r2, [r1]
bx lr
LgetFP8
vmov r2, r3, d0
strd r2, r3, [r1]
bx lr
LEAF_END
; ------------------------------------------------------------------
; Function used by remoting/COM interop to set floating point return value (since it's not in the same
; register(s) as non-floating point values).
;
; On entry:
; r0 : size of the FP result (4 or 8 bytes)
; r2/r3 : 32-bit or 64-bit FP result
;
; On exit:
; s0 : float result if r0 == 4
; d0 : double result if r0 == 8
;
LEAF_ENTRY setFPReturn
cmp r0, #4
bne LsetFP8
vmov s0, r2
bx lr
LsetFP8
vmov d0, r2, r3
bx lr
LEAF_END
#endif defined(FEATURE_COMINTEROP)
#ifdef FEATURE_COMINTEROP
; ------------------------------------------------------------------
; GenericComPlusCallStub that erects a ComPlusMethodFrame and calls into the runtime
; (CLRToCOMWorker) to dispatch rare cases of the interface call.
;
; On entry:
; r0 : 'this' object
; r12 : Interface MethodDesc*
; plus user arguments in registers and on the stack
;
; On exit:
; r0/r1/s0/d0 set to return value of the call as appropriate
;
NESTED_ENTRY GenericComPlusCallStub
PROLOG_WITH_TRANSITION_BLOCK 0x20
add r0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
mov r1, r12 ; pMethodDesc
; Call CLRToCOMWorker(pFrame). This call will set up the rest of the frame (including the vfptr,
; the GS cookie and linking to the thread), make the client call and return with correct registers set
; (r0/r1/s0-s3/d0-d3 as appropriate).
bl CLRToCOMWorker
; r0 = fpRetSize
; return value is stored before float argument registers
add r1, sp, #(__PWTB_FloatArgumentRegisters - 0x20)
bl setStubReturnValue
EPILOG_WITH_TRANSITION_BLOCK_RETURN
NESTED_END
; ------------------------------------------------------------------
; COM to CLR stub called the first time a particular method is invoked.
;
; On entry:
; r12 : (MethodDesc* - ComCallMethodDesc_Offset_FromR12) provided by prepad thunk
; plus user arguments in registers and on the stack
;
; On exit:
; tail calls to real method
;
NESTED_ENTRY ComCallPreStub
GBLA ComCallPreStub_FrameSize
GBLA ComCallPreStub_FramePad
GBLA ComCallPreStub_StackAlloc
GBLA ComCallPreStub_Frame
GBLA ComCallPreStub_ErrorReturn
; Set the defaults
ComCallPreStub_FramePad SETA 8 ; error return
ComCallPreStub_FrameSize SETA (ComCallPreStub_FramePad + SIZEOF__GSCookie + SIZEOF__ComMethodFrame)
IF ComCallPreStub_FrameSize:MOD:8 != 0
ComCallPreStub_FramePad SETA ComCallPreStub_FramePad + 4
ComCallPreStub_FrameSize SETA ComCallPreStub_FrameSize + 4
ENDIF
ComCallPreStub_StackAlloc SETA ComCallPreStub_FrameSize - SIZEOF__ArgumentRegisters - 2 * 4
ComCallPreStub_Frame SETA SIZEOF__FloatArgumentRegisters + ComCallPreStub_FramePad + SIZEOF__GSCookie
ComCallPreStub_ErrorReturn SETA SIZEOF__FloatArgumentRegisters
PROLOG_PUSH {r0-r3} ; Spill general argument registers
PROLOG_PUSH {r11,lr} ; Save return address
PROLOG_STACK_ALLOC ComCallPreStub_StackAlloc ; Alloc non-spill portion of stack frame
PROLOG_VPUSH {d0-d7} ; Spill floating point argument registers
CHECK_STACK_ALIGNMENT
; Finish initializing the frame. The C++ helper will fill in the GS cookie and vfptr and link us to
; the Thread frame chain (see ComPrestubMethodFrame::Push). That leaves us with m_pFuncDesc.
; The prepad thunk passes us a value which is the MethodDesc* - ComCallMethodDesc_Offset_FromR12 (due to encoding limitations in the
; thunk). So we must correct this by adding 4 before storing the pointer.
add r12, #(ComCallMethodDesc_Offset_FromR12)
str r12, [sp, #(ComCallPreStub_Frame + UnmanagedToManagedFrame__m_pvDatum)]
; Call the C++ worker: ComPreStubWorker(&Frame)
add r0, sp, #(ComCallPreStub_Frame)
add r1, sp, #(ComCallPreStub_ErrorReturn)
bl ComPreStubWorker
; Handle failure case.
cbz r0, ErrorExit
; Stash real target address where it won't be overwritten by restoring the calling state.
mov r12, r0
EPILOG_VPOP {d0-d7} ; Restore floating point argument registers
EPILOG_STACK_FREE ComCallPreStub_StackAlloc
EPILOG_POP {r11,lr}
EPILOG_POP {r0-r3} ; Restore argument registers
; Tail call the real target. Actually ComPreStubWorker returns the address of the prepad thunk on ARM,
; that way we don't run out of volatile registers trying to remember both the new target address and
; the hidden MethodDesc* argument. ComPreStubWorker patched the prepad though so the second time
; through we won't end up here again.
EPILOG_BRANCH_REG r12
ErrorExit
; Failed to find a stub to call. Retrieve the return value ComPreStubWorker set for us.
ldr r0, [sp, #(ComCallPreStub_ErrorReturn)]
ldr r1, [sp, #(ComCallPreStub_ErrorReturn+4)]
EPILOG_STACK_FREE ComCallPreStub_StackAlloc + SIZEOF__FloatArgumentRegisters
EPILOG_POP {r11,lr}
EPILOG_STACK_FREE SIZEOF__ArgumentRegisters
EPILOG_RETURN
NESTED_END
; ------------------------------------------------------------------
; COM to CLR stub which sets up a ComMethodFrame and calls COMToCLRWorker.
;
; On entry:
; r12 : (MethodDesc* - ComCallMethodDesc_Offset_FromR12) provided by prepad thunk
; plus user arguments in registers and on the stack
;
; On exit:
; Result in r0/r1/s0/d0 as per the real method being called
;
NESTED_ENTRY GenericComCallStub,,ReverseComUnwindFrameChainHandler
; Calculate space needed on stack for alignment padding, a GS cookie and a ComMethodFrame (minus the last
; field, m_ReturnAddress, which we'll push explicitly).
GBLA GenericComCallStub_FrameSize
GBLA GenericComCallStub_FramePad
GBLA GenericComCallStub_StackAlloc
GBLA GenericComCallStub_Frame
; Set the defaults
GenericComCallStub_FramePad SETA 0
GenericComCallStub_FrameSize SETA (GenericComCallStub_FramePad + SIZEOF__GSCookie + SIZEOF__ComMethodFrame)
IF GenericComCallStub_FrameSize:MOD:8 != 0
GenericComCallStub_FramePad SETA 4
GenericComCallStub_FrameSize SETA GenericComCallStub_FrameSize + GenericComCallStub_FramePad
ENDIF
GenericComCallStub_StackAlloc SETA GenericComCallStub_FrameSize - SIZEOF__ArgumentRegisters - 2 * 4
GenericComCallStub_Frame SETA SIZEOF__FloatArgumentRegisters + GenericComCallStub_FramePad + SIZEOF__GSCookie
PROLOG_PUSH {r0-r3} ; Spill general argument registers
PROLOG_PUSH {r11,lr} ; Save return address
PROLOG_STACK_ALLOC GenericComCallStub_StackAlloc ; Alloc non-spill portion of stack frame
PROLOG_VPUSH {d0-d7} ; Spill floating point argument registers
CHECK_STACK_ALIGNMENT
; Store MethodDesc* in frame. Due to a limitation of the prepad, r12 actually contains a value
; "ComCallMethodDesc_Offset_FromR12" less than the pointer we want, so fix that up.
add r12, r12, #(ComCallMethodDesc_Offset_FromR12)
str r12, [sp, #(GenericComCallStub_Frame + UnmanagedToManagedFrame__m_pvDatum)]
; Call COMToCLRWorker(pThread, pFrame). Note that pThread is computed inside the method so we don't
; need to set it up here.
;
; Setup R1 to point to the start of the explicit frame. We account for alignment padding and
; space for GSCookie.
add r1, sp, #(GenericComCallStub_Frame)
bl COMToCLRWorker
EPILOG_STACK_FREE GenericComCallStub_StackAlloc + SIZEOF__FloatArgumentRegisters
EPILOG_POP {r11,lr}
EPILOG_STACK_FREE SIZEOF__ArgumentRegisters
EPILOG_RETURN
NESTED_END
; ------------------------------------------------------------------
; COM to CLR stub called from COMToCLRWorker that actually dispatches to the real managed method.
;
; On entry:
; r0 : dwStackSlots, count of argument stack slots to copy
; r1 : pFrame, ComMethodFrame pushed by GenericComCallStub above
; r2 : pTarget, address of code to call
; r3 : pSecretArg, hidden argument passed to target above in r12
; [sp, #0] : pDangerousThis, managed 'this' reference
;
; On exit:
; Result in r0/r1/s0/d0 as per the real method being called
;
NESTED_ENTRY COMToCLRDispatchHelper,,CallDescrWorkerUnwindFrameChainHandler
PROLOG_PUSH {r4-r5,r7,lr}
PROLOG_STACK_SAVE r7
; Copy stack-based arguments. Make sure the eventual SP ends up 8-byte aligned. Note that the
; following calculations assume that the prolog has left the stack already aligned.
CHECK_STACK_ALIGNMENT
cbz r0, COMToCLRDispatchHelper_ArgumentsSetup
lsl r4, r0, #2 ; r4 = (dwStackSlots * 4)
and r5, r4, #4 ; Align the stack
sub sp, sp, r5
add r5, r1, #SIZEOF__ComMethodFrame
add r5, r5, r4
COMToCLRDispatchHelper_StackLoop
ldr r4, [r5,#-4]!
str r4, [sp,#-4]!
subs r0, r0, #1
bne COMToCLRDispatchHelper_StackLoop
CHECK_STACK_ALIGNMENT
COMToCLRDispatchHelper_ArgumentsSetup
; Load floating point argument registers.
sub r4, r1, #(GenericComCallStub_Frame)
vldm r4, {d0-d7}
; Prepare the call target and hidden argument prior to overwriting r0-r3.
mov r12, r3 ; r12 = hidden argument
mov lr, r2 ; lr = target code
; Load general argument registers except r0.
add r4, r1, #(SIZEOF__ComMethodFrame - SIZEOF__ArgumentRegisters + 4)
ldm r4, {r1-r3}
; Load r0 from the managed this, not the original incoming IUnknown*.
ldr r0, [r7, #(4 * 4)]
; Make the call.
blx lr
EPILOG_STACK_RESTORE r7
EPILOG_POP {r4-r5,r7,pc}
NESTED_END
#endif // FEATURE_COMINTEROP
#ifdef PROFILING_SUPPORTED
PROFILE_ENTER equ 1
PROFILE_LEAVE equ 2
PROFILE_TAILCALL equ 4
; Define the layout of the PROFILE_PLATFORM_SPECIFIC_DATA we push on the stack for all profiler
; helpers.
map 0
field 4 ; r0
field 4 ; r1
field 4 ; r11
field 4 ; Pc (caller's PC, i.e. LR)
field SIZEOF__FloatArgumentRegisters ; spilled floating point argument registers
functionId field 4
probeSp field 4
profiledSp field 4
hiddenArg field 4
flags field 4
SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA field 0
; ------------------------------------------------------------------
; Macro used to generate profiler helpers. In all cases we push a partially initialized
; PROFILE_PLATFORM_SPECIFIC_DATA structure on the stack and call into a C++ helper to continue processing.
;
; On entry:
; r0 : clientInfo
; r1/r2 : return values (in case of leave)
; frame pointer(r11) must be set (in case of enter)
; all arguments are on stack at frame pointer (r11) + 8bytes (save lr & prev r11).
;
; On exit:
; All register values are preserved including volatile registers
;
MACRO
DefineProfilerHelper $HelperName, $Flags
GBLS __ProfilerHelperFunc
__ProfilerHelperFunc SETS "$HelperName":CC:"Naked"
NESTED_ENTRY $__ProfilerHelperFunc
IMPORT $HelperName ; The C++ helper which does most of the work
PROLOG_PUSH {r0,r3,r9,r12} ; save volatile general purpose registers. remaining r1 & r2 are saved below...saving r9 as it is required for virtualunwinding
PROLOG_STACK_ALLOC (6*4) ; Reserve space for tail end of structure (5*4 bytes) and extra 4 bytes is for aligning the stack at 8-byte boundary
PROLOG_VPUSH {d0-d7} ; Spill floting point argument registers
PROLOG_PUSH {r1,r11,lr} ; Save possible return value in r1, frame pointer and return address
PROLOG_PUSH {r2} ; Save possible return value in r0. Before calling Leave Hook Jit moves contents of r0 to r2
; so pushing r2 instead of r0. This push statement cannot be combined with the above push
; as r2 gets pushed before r1.
CHECK_STACK_ALIGNMENT
; Zero r1 for use clearing fields in the PROFILE_PLATFORM_SPECIFIC_DATA.
eor r1, r1
; Clear functionId.
str r1, [sp, #functionId]
; Save caller's SP (at the point this helper was called).
add r2, sp, #(SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA + 20)
str r2, [sp, #probeSp]
; Save caller's SP (at the point where only argument registers have been spilled).
ldr r2, [r11]
add r2, r2, #8 ; location of arguments is at frame pointer(r11) + 8 (lr & prev frame ptr is saved before changing
str r2, [sp, #profiledSp]
; Clear hiddenArg.
str r1, [sp, #hiddenArg]
; Set flags to indicate type of helper called.
mov r1, #($Flags)
str r1, [sp, #flags]
; Call C++ portion of helper (<$HelperName>(clientInfo, &profilePlatformSpecificData)).
mov r1, sp
bl $HelperName
EPILOG_POP {r2}
EPILOG_POP {r1,r11,lr}
EPILOG_VPOP {d0-d7}
EPILOG_STACK_FREE (6*4)
EPILOG_POP {r0,r3,r9,r12}
EPILOG_RETURN
NESTED_END
MEND
DefineProfilerHelper ProfileEnter, PROFILE_ENTER
DefineProfilerHelper ProfileLeave, PROFILE_LEAVE
DefineProfilerHelper ProfileTailcall, PROFILE_TAILCALL
#endif // PROFILING_SUPPORTED
;
; If a preserved register were pushed onto the stack between
; the managed caller and the H_M_F, _R4_R11 will point to its
; location on the stack and it would have been updated on the
; stack by the GC already and it will be popped back into the
; appropriate register when the appropriate epilog is run.
;
; Otherwise, the register is preserved across all the code
; in this HCALL or FCALL, so we need to update those registers
; here because the GC will have updated our copies in the
; frame.
;
; So, if _R4_R11 points into the MachState, we need to update
; the register here. That's what this macro does.
;
MACRO
RestoreRegMS $regIndex, $reg
; Incoming:
;
; R0 = address of MachState
;
; $regIndex: Index of the register (R4-R11). For R4, index is 4.
; For R5, index is 5, and so on.
;
; $reg: Register name (e.g. R4, R5, etc)
;
; Get the address of the specified captured register from machine state
add r2, r0, #(MachState__captureR4_R11 + (($regIndex-4)*4))
; Get the address of the specified preserved register from machine state
ldr r3, [r0, #(MachState___R4_R11 + (($regIndex-4)*4))]
cmp r2, r3
bne %FT0
ldr $reg, [r2]
0
MEND
; EXTERN_C int __fastcall HelperMethodFrameRestoreState(
; INDEBUG_COMMA(HelperMethodFrame *pFrame)
; MachState *pState
; )
LEAF_ENTRY HelperMethodFrameRestoreState
#ifdef _DEBUG
mov r0, r1
#endif
; If machine state is invalid, then simply exit
ldr r1, [r0, #MachState__isValid]
cmp r1, #0
beq Done
RestoreRegMS 4, R4
RestoreRegMS 5, R5
RestoreRegMS 6, R6
RestoreRegMS 7, R7
RestoreRegMS 8, R8
RestoreRegMS 9, R9
RestoreRegMS 10, R10
RestoreRegMS 11, R11
Done
; Its imperative that the return value of HelperMethodFrameRestoreState is zero
; as it is used in the state machine to loop until it becomes zero.
; Refer to HELPER_METHOD_FRAME_END macro for details.
mov r0,#0
bx lr
LEAF_END
#ifdef FEATURE_HIJACK
; ------------------------------------------------------------------
; Hijack function for functions which return a value type
NESTED_ENTRY OnHijackTripThread
PROLOG_PUSH {r0,r4-r11,lr}
PROLOG_VPUSH {d0-d3} ; saving as d0-d3 can have the floating point return value
PROLOG_PUSH {r1} ; saving as r1 can have partial return value when return is > 32 bits
PROLOG_STACK_ALLOC 4 ; 8 byte align
CHECK_STACK_ALIGNMENT
add r0, sp, #40
bl OnHijackWorker
EPILOG_STACK_FREE 4
EPILOG_POP {r1}
EPILOG_VPOP {d0-d3}
EPILOG_POP {r0,r4-r11,pc}
NESTED_END
#endif ; FEATURE_HIJACK
; ------------------------------------------------------------------
; Macro to generate Redirection Stubs
;
; $reason : reason for redirection
; Eg. GCThreadControl
; NOTE: If you edit this macro, make sure you update GetCONTEXTFromRedirectedStubStackFrame.
; This function is used by both the personality routine and the debugger to retrieve the original CONTEXT.
MACRO
GenerateRedirectedHandledJITCaseStub $reason
GBLS __RedirectionStubFuncName
GBLS __RedirectionStubEndFuncName
GBLS __RedirectionFuncName
__RedirectionStubFuncName SETS "RedirectedHandledJITCaseFor":CC:"$reason":CC:"_Stub"
__RedirectionStubEndFuncName SETS "RedirectedHandledJITCaseFor":CC:"$reason":CC:"_StubEnd"
__RedirectionFuncName SETS "|?RedirectedHandledJITCaseFor":CC:"$reason":CC:"@Thread@@CAXXZ|"
IMPORT $__RedirectionFuncName
NESTED_ENTRY $__RedirectionStubFuncName
PROLOG_PUSH {r7,lr} ; return address
PROLOG_STACK_ALLOC 4 ; stack slot to save the CONTEXT *
PROLOG_STACK_SAVE r7
;REDIRECTSTUB_SP_OFFSET_CONTEXT is defined in asmconstants.h
;If CONTEXT is not saved at 0 offset from SP it must be changed as well.
ASSERT REDIRECTSTUB_SP_OFFSET_CONTEXT == 0
; Runtime check for 8-byte alignment. This check is necessary as this function can be
; entered before complete execution of the prolog of another function.
and r0, r7, #4
sub sp, sp, r0
; stack must be 8 byte aligned
CHECK_STACK_ALIGNMENT
;
; Save a copy of the redirect CONTEXT*.
; This is needed for the debugger to unwind the stack.
;
bl GetCurrentSavedRedirectContext
str r0, [r7]
;
; Fetch the interrupted pc and save it as our return address.
;
ldr r1, [r0, #CONTEXT_Pc]
str r1, [r7, #8]
;
; Call target, which will do whatever we needed to do in the context
; of the target thread, and will RtlRestoreContext when it is done.
;
bl $__RedirectionFuncName
EMIT_BREAKPOINT ; Unreachable
; Put a label here to tell the debugger where the end of this function is.
$__RedirectionStubEndFuncName
EXPORT $__RedirectionStubEndFuncName
NESTED_END
MEND
; ------------------------------------------------------------------
; Redirection Stub for GC in fully interruptible method
GenerateRedirectedHandledJITCaseStub GCThreadControl
; ------------------------------------------------------------------
GenerateRedirectedHandledJITCaseStub DbgThreadControl
; ------------------------------------------------------------------
GenerateRedirectedHandledJITCaseStub UserSuspend
#ifdef _DEBUG
; ------------------------------------------------------------------
; Redirection Stub for GC Stress
GenerateRedirectedHandledJITCaseStub GCStress
#endif
; ------------------------------------------------------------------
; Functions to probe for stack space
; Input reg r4 = amount of stack to probe for
; value of reg r4 is preserved on exit from function
; r12 is trashed
; The below two functions were copied from vctools\crt\crtw32\startup\arm\chkstk.asm
NESTED_ENTRY checkStack
subs r12,sp,r4
mrc p15,#0,r4,c13,c0,#2 ; get TEB *
ldr r4,[r4,#8] ; get Stack limit
bcc checkStack_neg ; if r12 is less then 0 set it to 0
checkStack_label1
cmp r12, r4
bcc stackProbe ; must probe to extend guardpage if r12 is beyond stackLimit
sub r4, sp, r12 ; restore value of r4
EPILOG_RETURN
checkStack_neg
mov r12, #0
b checkStack_label1
NESTED_END
NESTED_ENTRY stackProbe
PROLOG_PUSH {r5,r6}
mov r6, r12
bfc r6, #0, #0xc ; align down (4K)
stackProbe_loop
sub r4,r4,#0x1000 ; dec stack Limit by 4K as page size is 4K
ldr r5,[r4] ; try to read ... this should move the guard page
cmp r4,r6
bne stackProbe_loop
EPILOG_POP {r5,r6}
EPILOG_NOP sub r4,sp,r12
EPILOG_RETURN
NESTED_END
;------------------------------------------------
; VirtualMethodFixupStub
;
; In NGEN images, virtual slots inherited from cross-module dependencies
; point to a jump thunk that calls into the following function that will
; call into a VM helper. The VM helper is responsible for patching up
; thunk, upon executing the precode, so that all subsequent calls go directly
; to the actual method body.
;
; This is done lazily for performance reasons.
;
; On entry:
;
; R0 = "this" pointer
; R12 = Address of thunk + 4
NESTED_ENTRY VirtualMethodFixupStub
; Save arguments and return address
PROLOG_PUSH {r0-r3, lr}
; Align stack
PROLOG_STACK_ALLOC SIZEOF__FloatArgumentRegisters + 4
vstm sp, {d0-d7}
CHECK_STACK_ALIGNMENT
; R12 contains an address that is 4 bytes ahead of
; where the thunk starts. Refer to ZapImportVirtualThunk::Save
; for details on this.
;
; Move the correct thunk start address in R1
sub r1, r12, #4
; Call the helper in the VM to perform the actual fixup
; and tell us where to tail call. R0 already contains
; the this pointer.
bl VirtualMethodFixupWorker
; On return, R0 contains the target to tailcall to
mov r12, r0
; pop the stack and restore original register state
vldm sp, {d0-d7}
EPILOG_STACK_FREE SIZEOF__FloatArgumentRegisters + 4
EPILOG_POP {r0-r3, lr}
PATCH_LABEL VirtualMethodFixupPatchLabel
; and tailcall to the actual method
EPILOG_BRANCH_REG r12
NESTED_END
;------------------------------------------------
; ExternalMethodFixupStub
;
; In NGEN images, calls to cross-module external methods initially
; point to a jump thunk that calls into the following function that will
; call into a VM helper. The VM helper is responsible for patching up the
; thunk, upon executing the precode, so that all subsequent calls go directly
; to the actual method body.
;
; This is done lazily for performance reasons.
;
; On entry:
;
; R12 = Address of thunk + 4
NESTED_ENTRY ExternalMethodFixupStub
PROLOG_WITH_TRANSITION_BLOCK
add r0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
; Adjust (read comment above for details) and pass the address of the thunk
sub r1, r12, #4 ; pThunk
mov r2, #0 ; sectionIndex
mov r3, #0 ; pModule
bl ExternalMethodFixupWorker
; mov the address we patched to in R12 so that we can tail call to it
mov r12, r0
EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
PATCH_LABEL ExternalMethodFixupPatchLabel
EPILOG_BRANCH_REG r12
NESTED_END
;------------------------------------------------
; StubDispatchFixupStub
;
; In NGEN images, calls to interface methods initially
; point to a jump thunk that calls into the following function that will
; call into a VM helper. The VM helper is responsible for patching up the
; thunk with actual stub dispatch stub.
;
; On entry:
;
; R4 = Address of indirection cell
NESTED_ENTRY StubDispatchFixupStub
PROLOG_WITH_TRANSITION_BLOCK
; address of StubDispatchFrame
add r0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
mov r1, r4 ; siteAddrForRegisterIndirect
mov r2, #0 ; sectionIndex
mov r3, #0 ; pModule
bl StubDispatchFixupWorker
; mov the address we patched to in R12 so that we can tail call to it
mov r12, r0
EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
PATCH_LABEL StubDispatchFixupPatchLabel
EPILOG_BRANCH_REG r12
NESTED_END
;------------------------------------------------
; JIT_RareDisableHelper
;
; The JIT expects this helper to preserve registers used for return values
;
NESTED_ENTRY JIT_RareDisableHelper
PROLOG_PUSH {r0-r1, r11, lr} ; save integer return value
PROLOG_VPUSH {d0-d3} ; floating point return value
CHECK_STACK_ALIGNMENT
bl JIT_RareDisableHelperWorker
EPILOG_VPOP {d0-d3}
EPILOG_POP {r0-r1, r11, pc}
NESTED_END
;
; JIT Static access helpers for single appdomain case
;
; ------------------------------------------------------------------
; void* JIT_GetSharedNonGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID)
LEAF_ENTRY JIT_GetSharedNonGCStaticBase_SingleAppDomain
; If class is not initialized, bail to C++ helper
add r2, r0, #DomainLocalModule__m_pDataBlob
ldrb r2, [r2, r1]
tst r2, #1
beq CallCppHelper1
bx lr
CallCppHelper1
; Tail call JIT_GetSharedNonGCStaticBase_Helper
b JIT_GetSharedNonGCStaticBase_Helper
LEAF_END
; ------------------------------------------------------------------
; void* JIT_GetSharedNonGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID)
LEAF_ENTRY JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain
bx lr
LEAF_END
; ------------------------------------------------------------------
; void* JIT_GetSharedGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID)
LEAF_ENTRY JIT_GetSharedGCStaticBase_SingleAppDomain
; If class is not initialized, bail to C++ helper
add r2, r0, #DomainLocalModule__m_pDataBlob
ldrb r2, [r2, r1]
tst r2, #1
beq CallCppHelper3
ldr r0, [r0, #DomainLocalModule__m_pGCStatics]
bx lr
CallCppHelper3
; Tail call Jit_GetSharedGCStaticBase_Helper
b JIT_GetSharedGCStaticBase_Helper
LEAF_END
; ------------------------------------------------------------------
; void* JIT_GetSharedGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID)
LEAF_ENTRY JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain
ldr r0, [r0, #DomainLocalModule__m_pGCStatics]
bx lr
LEAF_END
; ------------------------------------------------------------------
; __declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val)
LEAF_ENTRY JIT_Stelem_Ref
; We retain arguments as they were passed and use r0 == array; r1 == idx; r2 == val
; check for null array
cbz r0, ThrowNullReferenceException
; idx bounds check
ldr r3,[r0,#ArrayBase__m_NumComponents]
cmp r3,r1
bls ThrowIndexOutOfRangeException
; fast path to null assignment (doesn't need any write-barriers)
cbz r2, AssigningNull
; Verify the array-type and val-type matches before writing
ldr r12, [r0] ; r12 = array MT
ldr r3, [r2] ; r3 = val->GetMethodTable()
ldr r12, [r12, #MethodTable__m_ElementType] ; array->GetArrayElementTypeHandle()
cmp r3, r12
beq JIT_Stelem_DoWrite
; Types didnt match but allow writing into an array of objects
ldr r3, =$g_pObjectClass
ldr r3, [r3] ; r3 = *g_pObjectClass
cmp r3, r12 ; array type matches with Object*
beq JIT_Stelem_DoWrite
; array type and val type do not exactly match. Raise frame and do detailed match
b JIT_Stelem_Ref_NotExactMatch
AssigningNull
; Assigning null doesn't need write barrier
adds r0, r1, LSL #2 ; r0 = r0 + (r1 x 4) = array->m_array[idx]
str r2, [r0, #PtrArray__m_Array] ; array->m_array[idx] = val
bx lr
ThrowNullReferenceException
; Tail call JIT_InternalThrow(NullReferenceException)
ldr r0, =CORINFO_NullReferenceException_ASM
b JIT_InternalThrow
ThrowIndexOutOfRangeException
; Tail call JIT_InternalThrow(NullReferenceException)
ldr r0, =CORINFO_IndexOutOfRangeException_ASM
b JIT_InternalThrow
LEAF_END
; ------------------------------------------------------------------
; __declspec(naked) void F_CALL_CONV JIT_Stelem_Ref_NotExactMatch(PtrArray* array,
; unsigned idx, Object* val)
; r12 = array->GetArrayElementTypeHandle()
;
NESTED_ENTRY JIT_Stelem_Ref_NotExactMatch
PROLOG_PUSH {lr}
PROLOG_PUSH {r0-r2}
CHECK_STACK_ALIGNMENT
; allow in case val can be casted to array element type
; call ObjIsInstanceOfNoGC(val, array->GetArrayElementTypeHandle())
mov r1, r12 ; array->GetArrayElementTypeHandle()
mov r0, r2
bl ObjIsInstanceOfNoGC
cmp r0, TypeHandle_CanCast
beq DoWrite ; ObjIsInstance returned TypeHandle::CanCast
; check via raising frame
NeedFrame
mov r1, sp ; r1 = &array
adds r0, sp, #8 ; r0 = &val
bl ArrayStoreCheck ; ArrayStoreCheck(&val, &array)
DoWrite
EPILOG_POP {r0-r2}
EPILOG_POP {lr}
EPILOG_BRANCH JIT_Stelem_DoWrite
NESTED_END
; ------------------------------------------------------------------
; __declspec(naked) void F_CALL_CONV JIT_Stelem_DoWrite(PtrArray* array, unsigned idx, Object* val)
LEAF_ENTRY JIT_Stelem_DoWrite
; Setup args for JIT_WriteBarrier. r0 = &array->m_array[idx]; r1 = val
adds r0, #PtrArray__m_Array ; r0 = &array->m_array
adds r0, r1, LSL #2
mov r1, r2 ; r1 = val
; Branch to the write barrier (which is already correctly overwritten with
; single or multi-proc code based on the current CPU
b JIT_WriteBarrier
LEAF_END
; ------------------------------------------------------------------
; GC write barrier support.
;
; There's some complexity here for a couple of reasons:
;
; Firstly, there are a few variations of barrier types (input registers, checked vs unchecked, UP vs MP etc.).
; So first we define a number of helper macros that perform fundamental pieces of a barrier and then we define
; the final barrier functions by assembling these macros in various combinations.
;
; Secondly, for performance reasons we believe it's advantageous to be able to modify the barrier functions
; over the lifetime of the CLR. Specifically ARM has real problems reading the values of external globals (we
; need two memory indirections to do this) so we'd like to be able to directly set the current values of
; various GC globals (e.g. g_lowest_address and g_card_table) into the barrier code itself and then reset them
; every time they change (the GC already calls the VM to inform it of these changes). To handle this without
; creating too much fragility such as hardcoding instruction offsets in the VM update code, we wrap write
; barrier creation and GC globals access in a set of macros that create a table of descriptors describing each
; offset that must be patched.
;
; Many of the following macros need a scratch register. Define a name for it here so it's easy to modify this
; in the future.
GBLS __wbscratch
__wbscratch SETS "r3"
;
; First define the meta-macros used to support dynamically patching write barriers.
;
; WRITEBARRIERAREA
;
; As we assemble each write barrier function we build a descriptor for the offsets within that function
; that need to be patched at runtime. We write these descriptors into a read-only portion of memory. Use a
; specially-named linker section for this to ensure all the descriptors are contiguous and form a table.
; During the final link of the CLR this section should be merged into the regular read-only data section.
;
; This macro handles switching assembler output to the above section (similar to the TEXTAREA or
; RODATAAREA macros defined by kxarm.h).
;
MACRO
WRITEBARRIERAREA
AREA |.clrwb|,DATA,READONLY
MEND
; BEGIN_WRITE_BARRIERS
;
; This macro must be invoked before any write barriers are defined. It sets up and exports a symbol,
; g_rgWriteBarrierDescriptors, used by the VM to locate the start of the table describing the offsets in
; each write barrier that need to be modified dynamically.
;
MACRO
BEGIN_WRITE_BARRIERS
; Define a global boolean to track whether we're currently in a BEGIN_WRITE_BARRIERS section. This is
; used purely to catch incorrect attempts to define a write barrier outside the section.
GBLL __defining_write_barriers
__defining_write_barriers SETL {true}
; Switch to the descriptor table section.
WRITEBARRIERAREA
; Define and export a symbol pointing to the start of the descriptor table.
g_rgWriteBarrierDescriptors
EXPORT g_rgWriteBarrierDescriptors
; Switch back to the code section.
TEXTAREA
MEND
; END_WRITE_BARRIERS
;
; This macro must be invoked after all write barriers have been defined. It finalizes the creation of the
; barrier descriptor table by writing a sentinel value at the end.
;
MACRO
END_WRITE_BARRIERS
ASSERT __defining_write_barriers
__defining_write_barriers SETL {false}
; Switch to the descriptor table section.
WRITEBARRIERAREA
; Write the sentinel value to the end of the descriptor table (a function entrypoint address of zero).
DCD 0
; Switch back to the code section.
TEXTAREA
MEND
; WRITE_BARRIER_ENTRY
;
; Declare the start of a write barrier function. Use similarly to NESTED_ENTRY. This is the only legal way
; to declare a write barrier function.
;
MACRO
WRITE_BARRIER_ENTRY $name
; Ensure we're called inside a BEGIN_WRITE_BARRIERS section.
ASSERT __defining_write_barriers
; Do the standard function declaration logic. Must use a NESTED_ENTRY since we require unwind info to
; be registered (for the case where the barrier AVs and the runtime needs to recover).
LEAF_ENTRY $name
; Record the function name as it's used as the basis for unique label name creation in some of the
; macros below.
GBLS __write_barrier_name
__write_barrier_name SETS "$name"
; Declare globals to collect the values of the offsets of instructions that load GC global values.
GBLA __g_lowest_address_offset
GBLA __g_highest_address_offset
GBLA __g_ephemeral_low_offset
GBLA __g_ephemeral_high_offset
GBLA __g_card_table_offset
; Initialize the above offsets to 0xffff. The default of zero is unsatisfactory because we could
; legally have an offset of zero and we need some way to distinguish unset values (both for debugging
; and because some write barriers don't use all the globals).
__g_lowest_address_offset SETA 0xffff
__g_highest_address_offset SETA 0xffff
__g_ephemeral_low_offset SETA 0xffff
__g_ephemeral_high_offset SETA 0xffff
__g_card_table_offset SETA 0xffff
MEND
; WRITE_BARRIER_END
;
; The partner to WRITE_BARRIER_ENTRY, used like NESTED_END.
;
MACRO
WRITE_BARRIER_END
LTORG ; force the literal pool to be emitted here so that copy code picks it up
; Use the standard macro to end the function definition.
LEAF_END_MARKED $__write_barrier_name
; Define a local string to hold the name of a label identifying the end of the write barrier function.
LCLS __EndLabelName
__EndLabelName SETS "$__write_barrier_name":CC:"_End"
; Switch to the descriptor table section.
WRITEBARRIERAREA
; Emit the descripter for this write barrier. The order of these datums must be kept in sync with the
; definition of the WriteBarrierDescriptor structure in vm\arm\stubs.cpp.
DCD $__write_barrier_name
DCD $__EndLabelName
DCD __g_lowest_address_offset
DCD __g_highest_address_offset
DCD __g_ephemeral_low_offset
DCD __g_ephemeral_high_offset
DCD __g_card_table_offset
; Switch back to the code section.
TEXTAREA
MEND
; LOAD_GC_GLOBAL
;
; Used any time we want to load the value of one of the supported GC globals into a register. This records
; the offset of the instructions used to do this (a movw/movt pair) so we can modify the actual value
; loaded at runtime.
;
; Note that a given write barrier can only load a given global once (which will be compile-time asserted
; below).
;
MACRO
LOAD_GC_GLOBAL $regName, $globalName
; Map the GC global name to the name of the variable tracking the offset for this function.
LCLS __offset_name
__offset_name SETS "__$globalName._offset"
; Ensure that we only attempt to load this global at most once in the current barrier function (we
; have this limitation purely because we only record one offset for each GC global).
ASSERT $__offset_name == 0xffff
; Define a unique name for a label we're about to define used in the calculation of the current
; function offset.
LCLS __offset_label_name
__offset_label_name SETS "$__write_barrier_name$__offset_name"
; Define the label.
$__offset_label_name
; Write the current function offset into the tracking variable.
$__offset_name SETA ($__offset_label_name - $__FuncStartLabel)
; Emit the instructions which will be patched to provide the value of the GC global (we start with a
; value of zero, so the write barriers have to be patched at least once before first use).
movw $regName, #0
movt $regName, #0
MEND
;
; Now define the macros used in the bodies of write barrier implementations.
;
; UPDATE_GC_SHADOW
;
; Update the GC shadow heap to aid debugging (no-op unless WRITE_BARRIER_CHECK is defined). Assumes the
; location being written lies on the GC heap (either we've already performed the dynamic check or this is
; statically asserted by the JIT by calling the unchecked version of the write barrier).
;
; Input:
; $ptrReg : register containing the location (in the real heap) to be updated
; $valReg : register containing the value (an objref) to be written to the location above
;
; Output:
; $__wbscratch : trashed
;
MACRO
UPDATE_GC_SHADOW $ptrReg, $valReg
#ifdef WRITE_BARRIER_CHECK
; Need one additional temporary register to hold the shadow pointer. Assume r7 is OK for now (and
; assert it). If this becomes a problem in the future the register choice can be parameterized.
LCLS pShadow
pShadow SETS "r7"
ASSERT "$ptrReg" != "$pShadow"
ASSERT "$valReg" != "$pShadow"
push {$pShadow}
; Compute address of shadow heap location:
; pShadow = g_GCShadow + ($ptrReg - g_lowest_address)
ldr $__wbscratch, =g_lowest_address
ldr $__wbscratch, [$__wbscratch]
sub $pShadow, $ptrReg, $__wbscratch
ldr $__wbscratch, =$g_GCShadow
ldr $__wbscratch, [$__wbscratch]
add $pShadow, $__wbscratch
; if (pShadow >= g_GCShadow) goto end
ldr $__wbscratch, =$g_GCShadowEnd
ldr $__wbscratch, [$__wbscratch]
cmp $pShadow, $__wbscratch
bhs %FT0
; *pShadow = $valReg
str $valReg, [$pShadow]
; Ensure that the write to the shadow heap occurs before the read from the GC heap so that race
; conditions are caught by INVALIDGCVALUE.
dmb
; if (*$ptrReg == $valReg) goto end
ldr $__wbscratch, [$ptrReg]
cmp $__wbscratch, $valReg
beq %FT0
; *pShadow = INVALIDGCVALUE (0xcccccccd)
movw $__wbscratch, #0xcccd
movt $__wbscratch, #0xcccc
str $__wbscratch, [$pShadow]
0
pop {$pShadow}
#endif // WRITE_BARRIER_CHECK
MEND
; UPDATE_CARD_TABLE
;
; Update the card table as necessary (if the object reference being assigned in the barrier refers to an
; object in the ephemeral generation). Otherwise this macro is a no-op. Assumes the location being written
; lies on the GC heap (either we've already performed the dynamic check or this is statically asserted by
; the JIT by calling the unchecked version of the write barrier).
;
; Additionally this macro can produce a uni-proc or multi-proc variant of the code. This governs whether
; we bother to check if the card table has been updated before making our own update (on an MP system it
; can be helpful to perform this check to avoid cache line thrashing, on an SP system the code path length
; is more important).
;
; Input:
; $ptrReg : register containing the location to be updated
; $valReg : register containing the value (an objref) to be written to the location above
; $mp : boolean indicating whether the code will run on an MP system
; $postGrow : boolean: {true} for post-grow version, {false} otherwise
; $tmpReg : additional register that can be trashed (can alias $ptrReg or $valReg if needed)
;
; Output:
; $tmpReg : trashed (defaults to $ptrReg)
; $__wbscratch : trashed
;
MACRO
UPDATE_CARD_TABLE $ptrReg, $valReg, $mp, $postGrow, $tmpReg
ASSERT "$ptrReg" != "$__wbscratch"
ASSERT "$valReg" != "$__wbscratch"
ASSERT "$tmpReg" != "$__wbscratch"
; In most cases the callers of this macro are fine with scratching $ptrReg, the exception being the
; ref write barrier, which wants to scratch $valReg instead. Ideally we could set $ptrReg as the
; default for the $tmpReg parameter, but limitations in armasm won't allow that. Similarly it doesn't
; seem to like us trying to redefine $tmpReg in the body of the macro. Instead we define a new local
; string variable and set that either with the value of $tmpReg or $ptrReg if $tmpReg wasn't
; specified.
LCLS tempReg
IF "$tmpReg" == ""
tempReg SETS "$ptrReg"
ELSE
tempReg SETS "$tmpReg"
ENDIF
; Check whether the value object lies in the ephemeral generations. If not we don't have to update the
; card table.
LOAD_GC_GLOBAL $__wbscratch, g_ephemeral_low
cmp $valReg, $__wbscratch
blo %FT0
; Only in post grow higher generation can be beyond ephemeral segment
IF $postGrow
LOAD_GC_GLOBAL $__wbscratch, g_ephemeral_high
cmp $valReg, $__wbscratch
bhs %FT0
ENDIF
; Update the card table.
LOAD_GC_GLOBAL $__wbscratch, g_card_table
add $__wbscratch, $__wbscratch, $ptrReg, lsr #10
; On MP systems make sure the card hasn't already been set first to avoid thrashing cache lines
; between CPUs.
; @ARMTODO: Check that the conditional store doesn't unconditionally gain exclusive access to the
; cache line anyway. Compare perf with a branch over and verify that omitting the compare on uniproc
; machines really is a perf win.
IF $mp
ldrb $tempReg, [$__wbscratch]
cmp $tempReg, #0xff
movne $tempReg, #0xff
strbne $tempReg, [$__wbscratch]
ELSE
mov $tempReg, #0xff
strb $tempReg, [$__wbscratch]
ENDIF
0
MEND
; CHECK_GC_HEAP_RANGE
;
; Verifies that the given value points into the GC heap range. If so the macro will fall through to the
; following code. Otherwise (if the value points outside the GC heap) a branch to the supplied label will
; be made.
;
; Input:
; $ptrReg : register containing the location to be updated
; $label : label branched to on a range check failure
;
; Output:
; $__wbscratch : trashed
;
MACRO
CHECK_GC_HEAP_RANGE $ptrReg, $label
ASSERT "$ptrReg" != "$__wbscratch"
LOAD_GC_GLOBAL $__wbscratch, g_lowest_address
cmp $ptrReg, $__wbscratch
blo $label
LOAD_GC_GLOBAL $__wbscratch, g_highest_address
cmp $ptrReg, $__wbscratch
bhs $label
MEND
;
; Finally define the write barrier functions themselves. Currently we don't provide variations that use
; different input registers. If the JIT wants this at a later stage in order to improve code quality it would
; be a relatively simple change to implement via an additional macro parameter to WRITE_BARRIER_ENTRY.
;
; The calling convention for the first batch of write barriers is:
;
; On entry:
; r0 : the destination address (LHS of the assignment)
; r1 : the object reference (RHS of the assignment)
;
; On exit:
; r0 : trashed
; $__wbscratch : trashed
;
; If you update any of the writebarrier be sure to update the sizes of patchable
; writebarriers in
; see ValidateWriteBarriers()
; The write barriers are macro taking arguments like
; $name: Name of the write barrier
; $mp: {true} for multi-proc, {false} otherwise
; $post: {true} for post-grow version, {false} otherwise
MACRO
JIT_WRITEBARRIER $name, $mp, $post
WRITE_BARRIER_ENTRY $name
IF $mp
dmb ; Perform a memory barrier
ENDIF
str r1, [r0] ; Write the reference
UPDATE_GC_SHADOW r0, r1 ; Update the shadow GC heap for debugging
UPDATE_CARD_TABLE r0, r1, $mp, $post ; Update the card table if necessary
bx lr
WRITE_BARRIER_END
MEND
MACRO
JIT_CHECKEDWRITEBARRIER_SP $name, $post
WRITE_BARRIER_ENTRY $name
str r1, [r0] ; Write the reference
CHECK_GC_HEAP_RANGE r0, %F1 ; Check whether the destination is in the GC heap
UPDATE_GC_SHADOW r0, r1 ; Update the shadow GC heap for debugging
UPDATE_CARD_TABLE r0, r1, {false}, $post; Update the card table if necessary
1
bx lr
WRITE_BARRIER_END
MEND
MACRO
JIT_CHECKEDWRITEBARRIER_MP $name, $post
WRITE_BARRIER_ENTRY $name
CHECK_GC_HEAP_RANGE r0, %F1 ; Check whether the destination is in the GC heap
dmb ; Perform a memory barrier
str r1, [r0] ; Write the reference
UPDATE_GC_SHADOW r0, r1 ; Update the shadow GC heap for debugging
UPDATE_CARD_TABLE r0, r1, {true}, $post ; Update the card table if necessary
bx lr
1
str r1, [r0] ; Write the reference
bx lr
WRITE_BARRIER_END
MEND
; The ByRef write barriers have a slightly different interface:
;
; On entry:
; r0 : the destination address (object reference written here)
; r1 : the source address (points to object reference to write)
;
; On exit:
; r0 : incremented by 4
; r1 : incremented by 4
; r2 : trashed
; $__wbscratch : trashed
;
MACRO
JIT_BYREFWRITEBARRIER $name, $mp, $post
WRITE_BARRIER_ENTRY $name
IF $mp
dmb ; Perform a memory barrier
ENDIF
ldr r2, [r1] ; Load target object ref from source pointer
str r2, [r0] ; Write the reference to the destination pointer
CHECK_GC_HEAP_RANGE r0, %F1 ; Check whether the destination is in the GC heap
UPDATE_GC_SHADOW r0, r2 ; Update the shadow GC heap for debugging
UPDATE_CARD_TABLE r0, r2, $mp, $post, r2 ; Update the card table if necessary (trash r2 rather than r0)
1
add r0, #4 ; Increment the destination pointer by 4
add r1, #4 ; Increment the source pointer by 4
bx lr
WRITE_BARRIER_END
MEND
BEGIN_WRITE_BARRIERS
; There 4 versions of each write barriers. A 2x2 combination of multi-proc/single-proc and pre/post grow version
JIT_WRITEBARRIER JIT_WriteBarrier_SP_Pre, {false}, {false}
JIT_WRITEBARRIER JIT_WriteBarrier_SP_Post, {false}, {true}
JIT_WRITEBARRIER JIT_WriteBarrier_MP_Pre, {true}, {false}
JIT_WRITEBARRIER JIT_WriteBarrier_MP_Post, {true}, {true}
JIT_CHECKEDWRITEBARRIER_SP JIT_CheckedWriteBarrier_SP_Pre, {false}
JIT_CHECKEDWRITEBARRIER_SP JIT_CheckedWriteBarrier_SP_Post, {true}
JIT_CHECKEDWRITEBARRIER_MP JIT_CheckedWriteBarrier_MP_Pre, {false}
JIT_CHECKEDWRITEBARRIER_MP JIT_CheckedWriteBarrier_MP_Post, {true}
JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_SP_Pre, {false}, {false}
JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_SP_Post, {false}, {true}
JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_MP_Pre, {true}, {false}
JIT_BYREFWRITEBARRIER JIT_ByRefWriteBarrier_MP_Post, {true}, {true}
END_WRITE_BARRIERS
#ifdef FEATURE_READYTORUN
NESTED_ENTRY DelayLoad_MethodCall_FakeProlog
; Match what the lazy thunk has pushed. The actual method arguments will be spilled later.
PROLOG_PUSH {r1-r3}
; This is where execution really starts.
DelayLoad_MethodCall
EXPORT DelayLoad_MethodCall
PROLOG_PUSH {r0}
PROLOG_WITH_TRANSITION_BLOCK 0x0, {true}, DoNotPushArgRegs
; Load the helper arguments
ldr r5, [sp,#(__PWTB_TransitionBlock+10*4)] ; pModule
ldr r6, [sp,#(__PWTB_TransitionBlock+11*4)] ; sectionIndex
ldr r7, [sp,#(__PWTB_TransitionBlock+12*4)] ; indirection
; Spill the actual method arguments
str r1, [sp,#(__PWTB_TransitionBlock+10*4)]
str r2, [sp,#(__PWTB_TransitionBlock+11*4)]
str r3, [sp,#(__PWTB_TransitionBlock+12*4)]
add r0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
mov r1, r7 ; pIndirection
mov r2, r6 ; sectionIndex
mov r3, r5 ; pModule
bl ExternalMethodFixupWorker
; mov the address we patched to in R12 so that we can tail call to it
mov r12, r0
EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
; Share the patch label
EPILOG_BRANCH ExternalMethodFixupPatchLabel
NESTED_END
MACRO
DynamicHelper $frameFlags, $suffix
GBLS __FakePrologName
__FakePrologName SETS "DelayLoad_Helper":CC:"$suffix":CC:"_FakeProlog"
NESTED_ENTRY $__FakePrologName
; Match what the lazy thunk has pushed. The actual method arguments will be spilled later.
PROLOG_PUSH {r1-r3}
GBLS __RealName
__RealName SETS "DelayLoad_Helper":CC:"$suffix"
; This is where execution really starts.
$__RealName
EXPORT $__RealName
PROLOG_PUSH {r0}
PROLOG_WITH_TRANSITION_BLOCK 0x4, {false}, DoNotPushArgRegs
; Load the helper arguments
ldr r5, [sp,#(__PWTB_TransitionBlock+10*4)] ; pModule
ldr r6, [sp,#(__PWTB_TransitionBlock+11*4)] ; sectionIndex
ldr r7, [sp,#(__PWTB_TransitionBlock+12*4)] ; indirection
; Spill the actual method arguments
str r1, [sp,#(__PWTB_TransitionBlock+10*4)]
str r2, [sp,#(__PWTB_TransitionBlock+11*4)]
str r3, [sp,#(__PWTB_TransitionBlock+12*4)]
add r0, sp, #__PWTB_TransitionBlock ; pTransitionBlock
mov r1, r7 ; pIndirection
mov r2, r6 ; sectionIndex
mov r3, r5 ; pModule
mov r4, $frameFlags
str r4, [sp,#0]
bl DynamicHelperWorker
cbnz r0, %FT0
ldr r0, [sp,#(__PWTB_TransitionBlock+9*4)] ; The result is stored in the argument area of the transition block
EPILOG_WITH_TRANSITION_BLOCK_RETURN
0
mov r12, r0
EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
EPILOG_BRANCH_REG r12
NESTED_END
MEND
DynamicHelper DynamicHelperFrameFlags_Default
DynamicHelper DynamicHelperFrameFlags_ObjectArg, _Obj
DynamicHelper DynamicHelperFrameFlags_ObjectArg | DynamicHelperFrameFlags_ObjectArg2, _ObjObj
#endif // FEATURE_READYTORUN
; Must be at very end of file
END