From da1940139c5bea1e6185ec38cc56621d5c09be21 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 31 Mar 2021 16:11:47 +0200 Subject: [PATCH 1/8] W^X support This change is the last part of enabling the W^X support. It adds the actual executable allocator that handles all double mapped memory allocations and creating the writeable mappings. The platform specific functionality is placed in a new minipal that is going to be a basis for future removal of Windows APIs usage from the native runtime. The last state of the change was tested on all the platforms that we support using coreclr pri 1 tests with both the W^X enabled and disabled using the COMPlus_EnableWXORX variable. The debugger changes were tested using the managed debugger testing suite on Windows x64, x86 and on Apple Silicon so far. Further testing on other platforms is in progress. --- src/coreclr/CMakeLists.txt | 3 + src/coreclr/clrdefinitions.cmake | 4 - src/coreclr/debug/ee/arm64/arm64walker.cpp | 9 +- src/coreclr/debug/ee/controller.cpp | 47 +- src/coreclr/debug/ee/controller.h | 18 +- src/coreclr/debug/ee/debugger.cpp | 30 +- src/coreclr/debug/ee/debugger.h | 32 +- src/coreclr/debug/inc/amd64/primitives.h | 22 +- src/coreclr/debug/inc/arm/primitives.h | 13 +- src/coreclr/debug/inc/arm64/primitives.h | 6 +- src/coreclr/debug/inc/i386/primitives.h | 13 +- .../dlls/mscoree/coreclr/CMakeLists.txt | 1 + src/coreclr/inc/CrstTypes.def | 4 + src/coreclr/inc/clrconfigvalues.h | 4 + src/coreclr/inc/crsttypes.h | 175 ++-- src/coreclr/inc/executableallocator.h | 201 ++++- src/coreclr/inc/jithelpers.h | 12 +- src/coreclr/inc/utilcode.h | 29 - src/coreclr/minipal/CMakeLists.txt | 7 + src/coreclr/minipal/Unix/CMakeLists.txt | 4 + src/coreclr/minipal/Unix/doublemapping.cpp | 168 ++++ src/coreclr/minipal/Windows/CMakeLists.txt | 4 + src/coreclr/minipal/Windows/doublemapping.cpp | 246 ++++++ src/coreclr/minipal/minipal.h | 78 ++ src/coreclr/utilcode/CMakeLists.txt | 1 + src/coreclr/utilcode/executableallocator.cpp | 755 ++++++++++++++++++ src/coreclr/utilcode/loaderheap.cpp | 18 +- src/coreclr/utilcode/util.cpp | 162 ---- src/coreclr/vm/CMakeLists.txt | 4 +- src/coreclr/vm/amd64/JitHelpers_Fast.asm | 79 +- src/coreclr/vm/amd64/jithelpers_fast.S | 26 +- src/coreclr/vm/amd64/jitinterfaceamd64.cpp | 20 +- src/coreclr/vm/arm/armsinglestepper.cpp | 30 +- src/coreclr/vm/arm/asmhelpers.S | 10 + src/coreclr/vm/arm/asmhelpers.asm | 12 + src/coreclr/vm/arm/cgencpu.h | 13 + src/coreclr/vm/arm/stubs.cpp | 19 +- src/coreclr/vm/arm64/arm64singlestepper.cpp | 12 +- src/coreclr/vm/arm64/asmhelpers.S | 10 - src/coreclr/vm/arm64/asmhelpers.asm | 35 +- src/coreclr/vm/arm64/cgencpu.h | 13 + src/coreclr/vm/arm64/stubs.cpp | 10 +- src/coreclr/vm/ceemain.cpp | 9 +- src/coreclr/vm/class.cpp | 5 +- src/coreclr/vm/codeman.cpp | 27 +- src/coreclr/vm/comcallablewrapper.cpp | 18 +- src/coreclr/vm/comcallablewrapper.h | 4 + src/coreclr/vm/comdelegate.cpp | 2 +- src/coreclr/vm/dynamicmethod.cpp | 7 +- src/coreclr/vm/excep.cpp | 2 - src/coreclr/vm/exceptionhandling.cpp | 6 - src/coreclr/vm/gccover.cpp | 4 +- src/coreclr/vm/i386/jithelp.S | 30 +- src/coreclr/vm/i386/jithelp.asm | 35 +- src/coreclr/vm/i386/jitinterfacex86.cpp | 84 +- src/coreclr/vm/i386/stublinkerx86.cpp | 2 +- src/coreclr/vm/i386/stublinkerx86.h | 10 +- src/coreclr/vm/jitinterface.cpp | 2 +- src/coreclr/vm/jitinterface.h | 5 - src/coreclr/vm/loaderallocator.cpp | 17 +- src/coreclr/vm/loaderallocator.inl | 6 - src/coreclr/vm/method.cpp | 40 - src/coreclr/vm/precode.cpp | 4 +- src/coreclr/vm/stackwalk.cpp | 2 - src/coreclr/vm/stublink.cpp | 14 +- src/coreclr/vm/stublink.h | 2 +- src/coreclr/vm/threads.cpp | 119 ++- src/coreclr/vm/threads.h | 19 +- src/coreclr/vm/virtualcallstub.cpp | 2 +- 69 files changed, 2139 insertions(+), 697 deletions(-) create mode 100644 src/coreclr/minipal/CMakeLists.txt create mode 100644 src/coreclr/minipal/Unix/CMakeLists.txt create mode 100644 src/coreclr/minipal/Unix/doublemapping.cpp create mode 100644 src/coreclr/minipal/Windows/CMakeLists.txt create mode 100644 src/coreclr/minipal/Windows/doublemapping.cpp create mode 100644 src/coreclr/minipal/minipal.h create mode 100644 src/coreclr/utilcode/executableallocator.cpp diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index 78aa969473525..b4a4859342702 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -119,6 +119,8 @@ add_subdirectory(pal/prebuilt/inc) add_subdirectory(debug/debug-pal) +add_subdirectory(minipal) + if(CLR_CMAKE_TARGET_WIN32) add_subdirectory(gc/sample) endif() @@ -171,6 +173,7 @@ include_directories("classlibnative/cryptography") include_directories("classlibnative/inc") include_directories("${GENERATED_INCLUDE_DIR}") include_directories("hosts/inc") +include_directories("minipal") if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) include_directories("${GENERATED_INCLUDE_DIR}/etw") diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index eeb421cac4c2f..0485ff99a99eb 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -224,10 +224,6 @@ if(CLR_CMAKE_TARGET_WIN32) endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) endif(CLR_CMAKE_TARGET_WIN32) -if(CLR_CMAKE_TARGET_OSX) - add_definitions(-DFEATURE_WRITEBARRIER_COPY) -endif(CLR_CMAKE_TARGET_OSX) - if (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) add_compile_definitions($<$>>:FEATURE_EH_FUNCLETS>) endif (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) diff --git a/src/coreclr/debug/ee/arm64/arm64walker.cpp b/src/coreclr/debug/ee/arm64/arm64walker.cpp index ae6e8c1fc2933..6c4dee9349700 100644 --- a/src/coreclr/debug/ee/arm64/arm64walker.cpp +++ b/src/coreclr/debug/ee/arm64/arm64walker.cpp @@ -171,7 +171,14 @@ BYTE* NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context, { CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, 0xd503201f); //Add Nop in buffer - m_pSharedPatchBypassBuffer->RipTargetFixup = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook +#if defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder ripTargetFixupWriterHolder(&m_pSharedPatchBypassBuffer->RipTargetFixup, sizeof(UINT_PTR)); + UINT_PTR *pRipTargetFixupRW = ripTargetFixupWriterHolder.GetRW(); +#else // HOST_OSX && HOST_ARM64 + UINT_PTR *pRipTargetFixupRW = &m_pSharedPatchBypassBuffer->RipTargetFixup; +#endif // HOST_OSX && HOST_ARM64 + + *pRipTargetFixupRW = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x is a Control Flow instr \n", opcode)); if (walk == WALK_CALL) //initialize Lr diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp index b17ae8f115002..f9304d16ab070 100644 --- a/src/coreclr/debug/ee/controller.cpp +++ b/src/coreclr/debug/ee/controller.cpp @@ -84,8 +84,13 @@ SharedPatchBypassBuffer* DebuggerControllerPatch::GetOrCreateSharedPatchBypassBu if (m_pSharedPatchBypassBuffer == NULL) { void *pSharedPatchBypassBufferRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(SharedPatchBypassBuffer)); +#if defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX, sizeof(SharedPatchBypassBuffer)); - new (sharedPatchBypassBufferWriterHolder.GetRW()) SharedPatchBypassBuffer(); + void *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW(); +#else // HOST_OSX && HOST_ARM64 + void *pSharedPatchBypassBufferRW = pSharedPatchBypassBufferRX; +#endif // HOST_OSX && HOST_ARM64 + new (pSharedPatchBypassBufferRW) SharedPatchBypassBuffer(); m_pSharedPatchBypassBuffer = (SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX; _ASSERTE(m_pSharedPatchBypassBuffer); @@ -4351,7 +4356,15 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, // m_pSharedPatchBypassBuffer = patch->GetOrCreateSharedPatchBypassBuffer(); - BYTE* patchBypass = m_pSharedPatchBypassBuffer->PatchBypass; +#if defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)m_pSharedPatchBypassBuffer, sizeof(SharedPatchBypassBuffer)); + SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW(); +#else // HOST_OSX && HOST_ARM64 + SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = m_pSharedPatchBypassBuffer; +#endif // HOST_OSX && HOST_ARM64 + + BYTE* patchBypassRX = m_pSharedPatchBypassBuffer->PatchBypass; + BYTE* patchBypassRW = pSharedPatchBypassBufferRW->PatchBypass; LOG((LF_CORDB, LL_INFO10000, "DPS::DPS: Patch skip for opcode 0x%.4x at address %p buffer allocated at 0x%.8x\n", patch->opcode, patch->address, m_pSharedPatchBypassBuffer)); // Copy the instruction block over to the patch skip @@ -4367,19 +4380,19 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, // the 2nd skip executes the new jump-stamp code and not the original method prologue code. Copying // the code every time ensures that we have the most up-to-date version of the code in the buffer. _ASSERTE( patch->IsBound() ); - CopyInstructionBlock(patchBypass, (const BYTE *)patch->address); + CopyInstructionBlock(patchBypassRW, (const BYTE *)patch->address); // Technically, we could create a patch skipper for an inactive patch, but we rely on the opcode being // set here. _ASSERTE( patch->IsActivated() ); - CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, patch->opcode); + CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypassRW, patch->opcode); LOG((LF_CORDB, LL_EVERYTHING, "SetInstruction was called\n")); // // Look at instruction to get some attributes // - NativeWalker::DecodeInstructionForPatchSkip(patchBypass, &(m_instrAttrib)); + NativeWalker::DecodeInstructionForPatchSkip(patchBypassRX, &(m_instrAttrib)); #if defined(TARGET_AMD64) @@ -4395,33 +4408,33 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, // Populate the RIP-relative buffer with the current value if needed // - BYTE* bufferBypass = m_pSharedPatchBypassBuffer->BypassBuffer; + BYTE* bufferBypassRW = pSharedPatchBypassBufferRW->BypassBuffer; // Overwrite the *signed* displacement. - int dwOldDisp = *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]); + int dwOldDisp = *(int*)(&patchBypassRX[m_instrAttrib.m_dwOffsetToDisp]); int dwNewDisp = offsetof(SharedPatchBypassBuffer, BypassBuffer) - (offsetof(SharedPatchBypassBuffer, PatchBypass) + m_instrAttrib.m_cbInstr); - *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp; + *(int*)(&patchBypassRW[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp; // This could be an LEA, which we'll just have to change into a MOV // and copy the original address - if (((patchBypass[0] == 0x4C) || (patchBypass[0] == 0x48)) && (patchBypass[1] == 0x8d)) + if (((patchBypassRX[0] == 0x4C) || (patchBypassRX[0] == 0x48)) && (patchBypassRX[1] == 0x8d)) { - patchBypass[1] = 0x8b; // MOV reg, mem + patchBypassRW[1] = 0x8b; // MOV reg, mem _ASSERTE((int)sizeof(void*) <= SharedPatchBypassBuffer::cbBufferBypass); - *(void**)bufferBypass = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); + *(void**)bufferBypassRW = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); } else { _ASSERTE(m_instrAttrib.m_cOperandSize <= SharedPatchBypassBuffer::cbBufferBypass); // Copy the data into our buffer. - memcpy(bufferBypass, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize); + memcpy(bufferBypassRW, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize); if (m_instrAttrib.m_fIsWrite) { // save the actual destination address and size so when we TriggerSingleStep() we can update the value - m_pSharedPatchBypassBuffer->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); - m_pSharedPatchBypassBuffer->RipTargetFixupSize = m_instrAttrib.m_cOperandSize; + pSharedPatchBypassBufferRW->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); + pSharedPatchBypassBufferRW->RipTargetFixupSize = m_instrAttrib.m_cOperandSize; } } } @@ -4490,17 +4503,17 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, #else // FEATURE_EMULATE_SINGLESTEP #ifdef TARGET_ARM64 - patchBypass = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode); + patchBypassRX = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode); #endif //TARGET_ARM64 //set eip to point to buffer... - SetIP(context, (PCODE)patchBypass); + SetIP(context, (PCODE)patchBypassRX); if (context ==(T_CONTEXT*) &c) thread->SetThreadContext(&c); - LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypass, patch->opcode)); + LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypassRX, patch->opcode)); // // Turn on single step (if the platform supports it) so we can diff --git a/src/coreclr/debug/ee/controller.h b/src/coreclr/debug/ee/controller.h index 12b1106f7a4b2..6996439c31fba 100644 --- a/src/coreclr/debug/ee/controller.h +++ b/src/coreclr/debug/ee/controller.h @@ -266,14 +266,28 @@ class SharedPatchBypassBuffer LONG AddRef() { - LONG newRefCount = InterlockedIncrement(&m_refCount); +#if !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder refCountWriterHolder(&m_refCount, sizeof(LONG)); + LONG *pRefCountRW = refCountWriterHolder.GetRW(); +#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + LONG *pRefCountRW = &m_refCount; +#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + + LONG newRefCount = InterlockedIncrement(pRefCountRW); _ASSERTE(newRefCount > 0); return newRefCount; } LONG Release() { - LONG newRefCount = InterlockedDecrement(&m_refCount); +#if !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + ExecutableWriterHolder refCountWriterHolder(&m_refCount, sizeof(LONG)); + LONG *pRefCountRW = refCountWriterHolder.GetRW(); +#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + LONG *pRefCountRW = &m_refCount; +#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + + LONG newRefCount = InterlockedDecrement(pRefCountRW); _ASSERTE(newRefCount >= 0); if (newRefCount == 0) diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp index 53ee5555ace43..e4563a31757f4 100644 --- a/src/coreclr/debug/ee/debugger.cpp +++ b/src/coreclr/debug/ee/debugger.cpp @@ -1317,13 +1317,19 @@ DebuggerEval::DebuggerEval(CONTEXT * pContext, DebuggerIPCE_FuncEvalInfo * pEval // Allocate the breakpoint instruction info in executable memory. void *bpInfoSegmentRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(DebuggerEvalBreakpointInfoSegment)); + +#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder bpInfoSegmentWriterHolder((DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX, sizeof(DebuggerEvalBreakpointInfoSegment)); - new (bpInfoSegmentWriterHolder.GetRW()) DebuggerEvalBreakpointInfoSegment(this); + DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = bpInfoSegmentWriterHolder.GetRW(); +#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX; +#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + new (bpInfoSegmentRW) DebuggerEvalBreakpointInfoSegment(this); m_bpInfoSegment = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX; // This must be non-zero so that the saved opcode is non-zero, and on IA64 we want it to be 0x16 // so that we can have a breakpoint instruction in any slot in the bundle. - bpInfoSegmentWriterHolder.GetRW()->m_breakpointInstruction[0] = 0x16; + bpInfoSegmentRW->m_breakpointInstruction[0] = 0x16; #if defined(TARGET_ARM) USHORT *bp = (USHORT*)&m_bpInfoSegment->m_breakpointInstruction; *bp = CORDbg_BREAK_INSTRUCTION; @@ -16234,6 +16240,7 @@ void Debugger::ReleaseDebuggerDataLock(Debugger *pDebugger) } #endif // DACCESS_COMPILE +#ifndef DACCESS_COMPILE /* ------------------------------------------------------------------------ * * Functions for DebuggerHeap executable memory allocations * ------------------------------------------------------------------------ */ @@ -16378,6 +16385,7 @@ void* DebuggerHeapExecutableMemoryAllocator::GetPointerToChunkWithUsageUpdate(De return page->GetPointerToChunk(chunkNumber); } +#endif // DACCESS_COMPILE /* ------------------------------------------------------------------------ * * DebuggerHeap impl @@ -16412,7 +16420,7 @@ void DebuggerHeap::Destroy() m_hHeap = NULL; } #endif -#ifndef HOST_WINDOWS +#if !defined(HOST_WINDOWS) && !defined(DACCESS_COMPILE) if (m_execMemAllocator != NULL) { delete m_execMemAllocator; @@ -16439,6 +16447,8 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable) } CONTRACTL_END; +#ifndef DACCESS_COMPILE + // Have knob catch if we don't want to lazy init the debugger. _ASSERTE(!g_DbgShouldntUseDebugger); m_fExecutable = fExecutable; @@ -16472,7 +16482,9 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable) return E_OUTOFMEMORY; } } -#endif +#endif + +#endif // !DACCESS_COMPILE return S_OK; } @@ -16549,7 +16561,10 @@ void *DebuggerHeap::Alloc(DWORD size) size += sizeof(InteropHeapCanary); #endif - void *ret; + void *ret = NULL; + +#ifndef DACCESS_COMPILE + #ifdef USE_INTEROPSAFE_HEAP _ASSERTE(m_hHeap != NULL); ret = ::HeapAlloc(m_hHeap, HEAP_ZERO_MEMORY, size); @@ -16585,7 +16600,7 @@ void *DebuggerHeap::Alloc(DWORD size) InteropHeapCanary * pCanary = InteropHeapCanary::GetFromRawAddr(ret); ret = pCanary->GetUserAddr(); #endif - +#endif // !DACCESS_COMPILE return ret; } @@ -16638,6 +16653,8 @@ void DebuggerHeap::Free(void *pMem) } CONTRACTL_END; +#ifndef DACCESS_COMPILE + #ifdef USE_INTEROPSAFE_CANARY // Check for canary @@ -16673,6 +16690,7 @@ void DebuggerHeap::Free(void *pMem) #endif // HOST_WINDOWS } #endif +#endif // !DACCESS_COMPILE } #ifndef DACCESS_COMPILE diff --git a/src/coreclr/debug/ee/debugger.h b/src/coreclr/debug/ee/debugger.h index f16f8cd6d9d9d..5503de2459099 100644 --- a/src/coreclr/debug/ee/debugger.h +++ b/src/coreclr/debug/ee/debugger.h @@ -1054,6 +1054,8 @@ constexpr uint64_t CHUNKS_PER_DEBUGGERHEAP=(DEBUGGERHEAP_PAGESIZE / EXPECTED_CHU constexpr uint64_t MAX_CHUNK_MASK=((1ull << CHUNKS_PER_DEBUGGERHEAP) - 1); constexpr uint64_t BOOKKEEPING_CHUNK_MASK (1ull << (CHUNKS_PER_DEBUGGERHEAP - 1)); +#ifndef DACCESS_COMPILE + // Forward declaration struct DebuggerHeapExecutableMemoryPage; @@ -1110,8 +1112,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage inline void SetNextPage(DebuggerHeapExecutableMemoryPage* nextPage) { +#if defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); - debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.nextPage = nextPage; + DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW(); +#else + DebuggerHeapExecutableMemoryPage *pHeapPageRW = this; +#endif + pHeapPageRW->chunks[0].bookkeeping.nextPage = nextPage; } inline uint64_t GetPageOccupancy() const @@ -1124,8 +1131,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage // Can't unset the bookmark chunk! ASSERT((newOccupancy & BOOKKEEPING_CHUNK_MASK) != 0); ASSERT(newOccupancy <= MAX_CHUNK_MASK); +#if defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); - debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.pageOccupancy = newOccupancy; + DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW(); +#else + DebuggerHeapExecutableMemoryPage *pHeapPageRW = this; +#endif + pHeapPageRW->chunks[0].bookkeeping.pageOccupancy = newOccupancy; } inline void* GetPointerToChunk(int chunkNum) const @@ -1136,14 +1148,18 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage DebuggerHeapExecutableMemoryPage() { - ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); - SetPageOccupancy(BOOKKEEPING_CHUNK_MASK); // only the first bit is set. +#if defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); + DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW(); +#else + DebuggerHeapExecutableMemoryPage *pHeapPageRW = this; +#endif for (uint8_t i = 1; i < CHUNKS_PER_DEBUGGERHEAP; i++) { ASSERT(i != 0); - debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.startOfPage = this; - debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.chunkNumber = i; + pHeapPageRW->chunks[i].data.startOfPage = this; + pHeapPageRW->chunks[i].data.chunkNumber = i; } } @@ -1190,6 +1206,8 @@ class DebuggerHeapExecutableMemoryAllocator Crst m_execMemAllocMutex; }; +#endif // DACCESS_COMPILE + // ------------------------------------------------------------------------ * // DebuggerHeap class // For interop debugging, we need a heap that: @@ -1201,6 +1219,8 @@ class DebuggerHeapExecutableMemoryAllocator #define USE_INTEROPSAFE_HEAP #endif +class DebuggerHeapExecutableMemoryAllocator; + class DebuggerHeap { public: diff --git a/src/coreclr/debug/inc/amd64/primitives.h b/src/coreclr/debug/inc/amd64/primitives.h index d8d14b24b5425..9d363938519c7 100644 --- a/src/coreclr/debug/inc/amd64/primitives.h +++ b/src/coreclr/debug/inc/amd64/primitives.h @@ -12,10 +12,6 @@ #ifndef PRIMITIVES_H_ #define PRIMITIVES_H_ -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) -#include "executableallocator.h" -#endif - #ifndef CORDB_ADDRESS_TYPE typedef const BYTE CORDB_ADDRESS_TYPE; typedef DPTR(CORDB_ADDRESS_TYPE) PTR_CORDB_ADDRESS_TYPE; @@ -191,14 +187,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address) { LIMITED_METHOD_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE); - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch) + *((unsigned char*)address) = 0xCC; // int 3 (single byte patch) FlushInstructionCache(GetCurrentProcess(), address, 1); } @@ -209,14 +198,7 @@ inline void CORDbgSetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address, // In a DAC build, this function assumes the input is an host address. LIMITED_METHOD_DAC_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder instructionWriterHolder(address, sizeof(unsigned char)); - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - *((unsigned char*)addressRW) = + *((unsigned char*)address) = (unsigned char) instruction; // setting one byte is important FlushInstructionCache(GetCurrentProcess(), address, 1); diff --git a/src/coreclr/debug/inc/arm/primitives.h b/src/coreclr/debug/inc/arm/primitives.h index c4e2d28602e56..269281eb006be 100644 --- a/src/coreclr/debug/inc/arm/primitives.h +++ b/src/coreclr/debug/inc/arm/primitives.h @@ -12,10 +12,6 @@ #ifndef PRIMITIVES_H_ #define PRIMITIVES_H_ -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) -#include "executableallocator.h" -#endif - #ifndef THUMB_CODE #define THUMB_CODE 1 #endif @@ -163,14 +159,7 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address, // In a DAC build, this function assumes the input is an host address. LIMITED_METHOD_DAC_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder instructionWriterHolder(address, sizeof(PRD_TYPE)); - CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)addressRW; + CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)address; _ASSERTE(ptraddr & THUMB_CODE); ptraddr &= ~THUMB_CODE; diff --git a/src/coreclr/debug/inc/arm64/primitives.h b/src/coreclr/debug/inc/arm64/primitives.h index 4f4c3f7bcd8f2..05c03c7b3094f 100644 --- a/src/coreclr/debug/inc/arm64/primitives.h +++ b/src/coreclr/debug/inc/arm64/primitives.h @@ -150,13 +150,13 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address, // In a DAC build, this function assumes the input is an host address. LIMITED_METHOD_DAC_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) +#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX) ExecutableWriterHolder instructionWriterHolder((LPVOID)address, sizeof(PRD_TYPE)); ULONGLONG ptraddr = dac_cast(instructionWriterHolder.GetRW()); -#else // !DBI_COMPILE && !DACCESS_COMPILE +#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX ULONGLONG ptraddr = dac_cast(address); -#endif // !DBI_COMPILE && !DACCESS_COMPILE +#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX *(PRD_TYPE *)ptraddr = instruction; FlushInstructionCache(GetCurrentProcess(), address, diff --git a/src/coreclr/debug/inc/i386/primitives.h b/src/coreclr/debug/inc/i386/primitives.h index 313b42c5a1970..2f228b3a3a9a1 100644 --- a/src/coreclr/debug/inc/i386/primitives.h +++ b/src/coreclr/debug/inc/i386/primitives.h @@ -12,10 +12,6 @@ #ifndef PRIMITIVES_H_ #define PRIMITIVES_H_ -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) -#include "executableallocator.h" -#endif - typedef const BYTE CORDB_ADDRESS_TYPE; typedef DPTR(CORDB_ADDRESS_TYPE) PTR_CORDB_ADDRESS_TYPE; @@ -151,14 +147,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address) { LIMITED_METHOD_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE); - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch) + *((unsigned char*)address) = 0xCC; // int 3 (single byte patch) FlushInstructionCache(GetCurrentProcess(), address, 1); } diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt index fae55ecdc3ea5..9b8e4b649864d 100644 --- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt @@ -109,6 +109,7 @@ set(CORECLR_LIBRARIES v3binder System.Globalization.Native-Static interop + coreclrminipal ) if(CLR_CMAKE_TARGET_WIN32) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index c48872a0b9424..3b67b14834e29 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -201,6 +201,10 @@ End Crst Exception End +Crst ExecutableAllocatorLock + Unordered +End + Crst ExecuteManRangeLock End diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 3f21e41dfa369..1c57796cb2e39 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -735,6 +735,10 @@ RETAIL_CONFIG_STRING_INFO(EXTERNAL_DOTNET_DiagnosticPorts, W("DiagnosticPorts"), RETAIL_CONFIG_STRING_INFO(INTERNAL_LTTngConfig, W("LTTngConfig"), "Configuration for LTTng.") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is set to 0, this will prevent the LTTng library from being loaded at runtime") +// +// Executable code +// +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWXORX, W("EnableWXORX"), 1, "Enable W^X for executable memory."); #ifdef FEATURE_GDBJIT /// diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h index a1bab2ecb906c..462a654a62c5f 100644 --- a/src/coreclr/inc/crsttypes.h +++ b/src/coreclr/inc/crsttypes.h @@ -49,92 +49,93 @@ enum CrstType CrstEventPipe = 31, CrstEventStore = 32, CrstException = 33, - CrstExecuteManRangeLock = 34, - CrstExternalObjectContextCache = 35, - CrstFCall = 36, - CrstFuncPtrStubs = 37, - CrstFusionAppCtx = 38, - CrstGCCover = 39, - CrstGlobalStrLiteralMap = 40, - CrstHandleTable = 41, - CrstHostAssemblyMap = 42, - CrstHostAssemblyMapAdd = 43, - CrstIbcProfile = 44, - CrstIJWFixupData = 45, - CrstIJWHash = 46, - CrstILStubGen = 47, - CrstInlineTrackingMap = 48, - CrstInstMethodHashTable = 49, - CrstInterop = 50, - CrstInteropData = 51, - CrstIsJMCMethod = 52, - CrstISymUnmanagedReader = 53, - CrstJit = 54, - CrstJitGenericHandleCache = 55, - CrstJitInlineTrackingMap = 56, - CrstJitPatchpoint = 57, - CrstJitPerf = 58, - CrstJumpStubCache = 59, - CrstLeafLock = 60, - CrstListLock = 61, - CrstLoaderAllocator = 62, - CrstLoaderAllocatorReferences = 63, - CrstLoaderHeap = 64, - CrstManagedObjectWrapperMap = 65, - CrstMethodDescBackpatchInfoTracker = 66, - CrstModule = 67, - CrstModuleFixup = 68, - CrstModuleLookupTable = 69, - CrstMulticoreJitHash = 70, - CrstMulticoreJitManager = 71, - CrstNativeImageEagerFixups = 72, - CrstNativeImageLoad = 73, - CrstNls = 74, - CrstNotifyGdb = 75, - CrstObjectList = 76, - CrstPEImage = 77, - CrstPendingTypeLoadEntry = 78, - CrstPgoData = 79, - CrstPinnedByrefValidation = 80, - CrstProfilerGCRefDataFreeList = 81, - CrstProfilingAPIStatus = 82, - CrstRCWCache = 83, - CrstRCWCleanupList = 84, - CrstReadyToRunEntryPointToMethodDescMap = 85, - CrstReflection = 86, - CrstReJITGlobalRequest = 87, - CrstRetThunkCache = 88, - CrstSavedExceptionInfo = 89, - CrstSaveModuleProfileData = 90, - CrstSecurityStackwalkCache = 91, - CrstSigConvert = 92, - CrstSingleUseLock = 93, - CrstSpecialStatics = 94, - CrstStackSampler = 95, - CrstStressLog = 96, - CrstStubCache = 97, - CrstStubDispatchCache = 98, - CrstStubUnwindInfoHeapSegments = 99, - CrstSyncBlockCache = 100, - CrstSyncHashLock = 101, - CrstSystemBaseDomain = 102, - CrstSystemDomain = 103, - CrstSystemDomainDelayedUnloadList = 104, - CrstThreadIdDispenser = 105, - CrstThreadpoolTimerQueue = 106, - CrstThreadpoolWaitThreads = 107, - CrstThreadpoolWorker = 108, - CrstThreadStore = 109, - CrstTieredCompilation = 110, - CrstTypeEquivalenceMap = 111, - CrstTypeIDMap = 112, - CrstUMEntryThunkCache = 113, - CrstUniqueStack = 114, - CrstUnresolvedClassLock = 115, - CrstUnwindInfoTableLock = 116, - CrstVSDIndirectionCellLock = 117, - CrstWrapperTemplate = 118, - kNumberOfCrstTypes = 119 + CrstExecutableAllocatorLock = 34, + CrstExecuteManRangeLock = 35, + CrstExternalObjectContextCache = 36, + CrstFCall = 37, + CrstFuncPtrStubs = 38, + CrstFusionAppCtx = 39, + CrstGCCover = 40, + CrstGlobalStrLiteralMap = 41, + CrstHandleTable = 42, + CrstHostAssemblyMap = 43, + CrstHostAssemblyMapAdd = 44, + CrstIbcProfile = 45, + CrstIJWFixupData = 46, + CrstIJWHash = 47, + CrstILStubGen = 48, + CrstInlineTrackingMap = 49, + CrstInstMethodHashTable = 50, + CrstInterop = 51, + CrstInteropData = 52, + CrstIsJMCMethod = 53, + CrstISymUnmanagedReader = 54, + CrstJit = 55, + CrstJitGenericHandleCache = 56, + CrstJitInlineTrackingMap = 57, + CrstJitPatchpoint = 58, + CrstJitPerf = 59, + CrstJumpStubCache = 60, + CrstLeafLock = 61, + CrstListLock = 62, + CrstLoaderAllocator = 63, + CrstLoaderAllocatorReferences = 64, + CrstLoaderHeap = 65, + CrstManagedObjectWrapperMap = 66, + CrstMethodDescBackpatchInfoTracker = 67, + CrstModule = 68, + CrstModuleFixup = 69, + CrstModuleLookupTable = 70, + CrstMulticoreJitHash = 71, + CrstMulticoreJitManager = 72, + CrstNativeImageEagerFixups = 73, + CrstNativeImageLoad = 74, + CrstNls = 75, + CrstNotifyGdb = 76, + CrstObjectList = 77, + CrstPEImage = 78, + CrstPendingTypeLoadEntry = 79, + CrstPgoData = 80, + CrstPinnedByrefValidation = 81, + CrstProfilerGCRefDataFreeList = 82, + CrstProfilingAPIStatus = 83, + CrstRCWCache = 84, + CrstRCWCleanupList = 85, + CrstReadyToRunEntryPointToMethodDescMap = 86, + CrstReflection = 87, + CrstReJITGlobalRequest = 88, + CrstRetThunkCache = 89, + CrstSavedExceptionInfo = 90, + CrstSaveModuleProfileData = 91, + CrstSecurityStackwalkCache = 92, + CrstSigConvert = 93, + CrstSingleUseLock = 94, + CrstSpecialStatics = 95, + CrstStackSampler = 96, + CrstStressLog = 97, + CrstStubCache = 98, + CrstStubDispatchCache = 99, + CrstStubUnwindInfoHeapSegments = 100, + CrstSyncBlockCache = 101, + CrstSyncHashLock = 102, + CrstSystemBaseDomain = 103, + CrstSystemDomain = 104, + CrstSystemDomainDelayedUnloadList = 105, + CrstThreadIdDispenser = 106, + CrstThreadpoolTimerQueue = 107, + CrstThreadpoolWaitThreads = 108, + CrstThreadpoolWorker = 109, + CrstThreadStore = 110, + CrstTieredCompilation = 111, + CrstTypeEquivalenceMap = 112, + CrstTypeIDMap = 113, + CrstUMEntryThunkCache = 114, + CrstUniqueStack = 115, + CrstUnresolvedClassLock = 116, + CrstUnwindInfoTableLock = 117, + CrstVSDIndirectionCellLock = 118, + CrstWrapperTemplate = 119, + kNumberOfCrstTypes = 120 }; #endif // __CRST_TYPES_INCLUDED @@ -179,6 +180,7 @@ int g_rgCrstLevelMap[] = 18, // CrstEventPipe 0, // CrstEventStore 0, // CrstException + -1, // CrstExecutableAllocatorLock 0, // CrstExecuteManRangeLock 0, // CrstExternalObjectContextCache 3, // CrstFCall @@ -303,6 +305,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstEventPipe", "CrstEventStore", "CrstException", + "CrstExecutableAllocatorLock", "CrstExecuteManRangeLock", "CrstExternalObjectContextCache", "CrstFCall", diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h index ce0c6c22f890e..101178f9a4ef0 100644 --- a/src/coreclr/inc/executableallocator.h +++ b/src/coreclr/inc/executableallocator.h @@ -11,6 +11,191 @@ #include "utilcode.h" #include "ex.h" +#include "minipal.h" + +#ifndef DACCESS_COMPILE + +// This class is responsible for allocation of all the executable memory in the runtime. +class ExecutableAllocator +{ + // RX address range block descriptor + struct BlockRX + { + // Next block in a linked list + BlockRX* next; + // Base address of the block + void* baseRX; + // Size of the block + size_t size; + // Offset of the block in the shared memory + size_t offset; + }; + + // RW address range block descriptor + struct BlockRW + { + // Next block in a linked list + BlockRW* next; + // Base address of the RW mapping of the block + void* baseRW; + // Base address of the RX mapping of the block + void* baseRX; + // Size of the block + size_t size; + // Usage reference count of the RW block. RW blocks can be reused + // when multiple mappings overlap in the VA space at the same time + // (even from multiple threads) + size_t refCount; + }; + + typedef void (*FatalErrorHandler)(UINT errorCode, LPCWSTR pszMessage); + + // Instance of the allocator + static ExecutableAllocator* g_instance; + + // Callback to the runtime to report fatal errors + static FatalErrorHandler g_fatalErrorHandler; + +#if USE_UPPER_ADDRESS + // Preferred region to allocate the code in. + static BYTE* g_codeMinAddr; + static BYTE* g_codeMaxAddr; + static BYTE* g_codeAllocStart; + // Next address to try to allocate for code in the preferred region. + static BYTE* g_codeAllocHint; +#endif // USE_UPPER_ADDRESS + + // Caches the COMPlus_EnableWXORX setting + static bool g_isWXorXEnabled; + + // Head of the linked list of all RX blocks that were allocated by this allocator + BlockRX* m_pFirstBlockRX = NULL; + + // Head of the linked list of free RX blocks that were allocated by this allocator and then backed out + BlockRX* m_pFirstFreeBlockRX = NULL; + + // Head of the linked list of currently mapped RW blocks + BlockRW* m_pFirstBlockRW = NULL; + + // Handle of the double mapped memory mapper + void *m_doubleMemoryMapperHandle = NULL; + + // Maximum size of executable memory this allocator can allocate + size_t m_maxExecutableCodeSize; + + // First free offset in the underlying shared memory. It is not used + // for platforms that don't use shared memory. + size_t m_freeOffset = 0; + + // Last RW mapping cached so that it can be reused for the next mapping + // request if it goes into the same range. + BlockRW* m_cachedMapping = NULL; + + // Synchronization of the public allocator methods + CRITSEC_COOKIE m_CriticalSection; + + // Update currently cached mapping. If the passed in block is the same as the one + // in the cache, it keeps it cached. Otherwise it destroys the currently cached one + // and replaces it by the passed in one. + void UpdateCachedMapping(BlockRW *pBlock); + + // Find existing RW block that maps the whole specified range of RX memory. + // Return NULL if no such block exists. + void* FindRWBlock(void* baseRX, size_t size); + + // Add RW block to the list of existing RW blocks + bool AddRWBlock(void* baseRW, void* baseRX, size_t size); + + // Remove RW block from the list of existing RW blocks and return the base + // address and size the underlying memory was mapped at. + // Return false if no existing RW block contains the passed in address. + bool RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize); + + // Find a free block with the closest size >= the requested size. + // Returns NULL if no such block exists. + BlockRX* FindBestFreeBlock(size_t size); + + // Return memory mapping granularity. + static size_t Granularity(); + + // Allocate a block of executable memory of the specified size. + // It doesn't acquire the actual virtual memory, just the + // range of the underlying shared memory. + BlockRX* AllocateBlock(size_t size, bool* pIsFreeBlock); + + // Backout the block allocated by AllocateBlock in case of an + // error. + void BackoutBlock(BlockRX* pBlock, bool isFreeBlock); + + // Allocate range of offsets in the underlying shared memory + bool AllocateOffset(size_t* pOffset, size_t size); + + // Add RX block to the linked list of existing blocks + void AddRXBlock(BlockRX *pBlock); + + // Return true if double mapping is enabled. + static bool IsDoubleMappingEnabled(); + + // Initialize the allocator instance + bool Initialize(); + +public: + + // Return the ExecuteAllocator singleton instance + static ExecutableAllocator* Instance(); + + // Initialize the static members of the Executable allocator and allocate + // and initialize the instance of it. + static HRESULT StaticInitialize(FatalErrorHandler fatalErrorHandler); + + // Destroy the allocator + ~ExecutableAllocator(); + + // Return true if W^X is enabled + static bool IsWXORXEnabled(); + + // Use this function to initialize the g_codeAllocHint + // during startup. base is runtime .dll base address, + // size is runtime .dll virtual size. + static void InitCodeAllocHint(size_t base, size_t size, int randomPageOffset); + + // Use this function to reset the g_codeAllocHint + // after unloading an AppDomain + static void ResetCodeAllocHint(); + + // Returns TRUE if p is located in near clr.dll that allows us + // to use rel32 IP-relative addressing modes. + static bool IsPreferredExecutableRange(void* p); + + // Reserve the specified amount of virtual address space for executable mapping. + void* Reserve(size_t size); + + // Reserve the specified amount of virtual address space for executable mapping. + // The reserved range must be within the loAddress and hiAddress. If it is not + // possible to reserve memory in such range, the method returns NULL. + void* ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress); + + // Reserve the specified amount of virtual address space for executable mapping + // exactly at the given address. + void* ReserveAt(void* baseAddressRX, size_t size); + + // Commit the specified range of memory. The memory can be committed as executable (RX) + // or non-executable (RW) based on the passed in isExecutable flag. The non-executable + // allocations are used to allocate data structures that need to be close to the + // executable code due to memory addressing performance related reasons. + void* Commit(void* pStart, size_t size, bool isExecutable); + + // Release the executable memory block starting at the passed in address that was allocated + // by one of the ReserveXXX methods. + void Release(void* pRX); + + // Map the specified block of executable memory as RW + void* MapRW(void* pRX, size_t size); + + // Unmap the RW mapping at the specified address + void UnmapRW(void* pRW); +}; + // Holder class to map read-execute memory as read-write so that it can be modified without using read-write-execute mapping. // At the moment the implementation is dummy, returning the same addresses for both cases and expecting them to be read-write-execute. // The class uses the move semantics to ensure proper unmapping in case of re-assigning of the holder value. @@ -30,13 +215,17 @@ class ExecutableWriterHolder void Unmap() { +#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE) if (m_addressRX != NULL) { - // TODO: mapping / unmapping for targets using double memory mapping will be added with the double mapped allocator addition -#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE) PAL_JitWriteProtect(false); -#endif } +#else + if (m_addressRX != m_addressRW) + { + ExecutableAllocator::Instance()->UnmapRW((void*)m_addressRW); + } +#endif } public: @@ -62,9 +251,11 @@ class ExecutableWriterHolder ExecutableWriterHolder(T* addressRX, size_t size) { m_addressRX = addressRX; +#if defined(HOST_OSX) && defined(HOST_ARM64) m_addressRW = addressRX; -#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE) PAL_JitWriteProtect(true); +#else + m_addressRW = (T *)ExecutableAllocator::Instance()->MapRW((void*)addressRX, size); #endif } @@ -79,3 +270,5 @@ class ExecutableWriterHolder return m_addressRW; } }; + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index fb65ea9fa613c..3c42f0850850b 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -302,12 +302,12 @@ #endif // !FEATURE_EH_FUNCLETS #ifdef TARGET_X86 - JITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB) JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, JIT_CheckedWriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB) JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, JIT_CheckedWriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB) diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h index a47034ee2e05c..77df9dfa94d2a 100644 --- a/src/coreclr/inc/utilcode.h +++ b/src/coreclr/inc/utilcode.h @@ -1014,35 +1014,6 @@ void SplitPath(__in SString const &path, #define CLRGetTickCount64() GetTickCount64() -// -// Use this function to initialize the s_CodeAllocHint -// during startup. base is runtime .dll base address, -// size is runtime .dll virtual size. -// -void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset); - - -// -// Use this function to reset the s_CodeAllocHint -// after unloading an AppDomain -// -void ResetCodeAllocHint(); - -// -// Returns TRUE if p is located in near clr.dll that allows us -// to use rel32 IP-relative addressing modes. -// -BOOL IsPreferredExecutableRange(void * p); - -// -// Allocate free memory that will be used for executable code -// Handles the special requirements that we have on 64-bit platforms -// where we want the executable memory to be located near mscorwks -// -BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize, - DWORD flAllocationType, - DWORD flProtect); - // // Allocate free memory within the range [pMinAddr..pMaxAddr] using // ClrVirtualQuery to find free memory and ClrVirtualAlloc to allocate it. diff --git a/src/coreclr/minipal/CMakeLists.txt b/src/coreclr/minipal/CMakeLists.txt new file mode 100644 index 0000000000000..3096237d2a2fe --- /dev/null +++ b/src/coreclr/minipal/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories(.) +if (CLR_CMAKE_HOST_UNIX) + add_subdirectory(Unix) +else (CLR_CMAKE_HOST_UNIX) + add_subdirectory(Windows) +endif (CLR_CMAKE_HOST_UNIX) + diff --git a/src/coreclr/minipal/Unix/CMakeLists.txt b/src/coreclr/minipal/Unix/CMakeLists.txt new file mode 100644 index 0000000000000..b56b5017d375f --- /dev/null +++ b/src/coreclr/minipal/Unix/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(coreclrminipal + STATIC + doublemapping.cpp +) diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp new file mode 100644 index 0000000000000..52f3809efb868 --- /dev/null +++ b/src/coreclr/minipal/Unix/doublemapping.cpp @@ -0,0 +1,168 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef TARGET_LINUX +#include +#include // __NR_memfd_create +#endif // TARGET_LINUX +#include "minipal.h" + +#if defined(TARGET_OSX) && defined(TARGET_AMD64) +#include +#endif // TARGET_OSX && TARGET_AMD64 + +#ifndef TARGET_OSX + +#ifdef TARGET_64BIT +static const off_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024; +#else +static const off_t MaxDoubleMappedSize = UINT_MAX; +#endif + +#ifdef TARGET_LINUX +#define memfd_create(...) syscall(__NR_memfd_create, __VA_ARGS__) +#endif // TARGET_LINUX + +#endif // TARGET_OSX + +bool VMToOSInterface::CreateDoubleMemoryMapper(void** pHandle, size_t *pMaxExecutableCodeSize) +{ +#ifndef TARGET_OSX + + int fd = memfd_create("doublemapper", MFD_CLOEXEC); + + if (fd == -1) + { + return false; + } + + if (ftruncate(fd, MaxDoubleMappedSize) == -1) + { + close(fd); + return false; + } + + *pMaxExecutableCodeSize = MaxDoubleMappedSize; + *pHandle = (void*)(size_t)fd; +#else // !TARGET_OSX + *pMaxExecutableCodeSize = SIZE_MAX; + *pHandle = NULL; +#endif // !TARGET_OSX + + return true; +} + +void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle) +{ +#ifndef TARGET_OSX + close((int)(size_t)mapperHandle); +#endif +} + +extern "C" void* PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(const void* lpBeginAddress, const void* lpEndAddress, size_t dwSize); + +void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd) +{ + int fd = (int)(size_t)mapperHandle; + + if (rangeStart != NULL || rangeEnd != NULL) + { + void* result = PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(rangeStart, rangeEnd, size); +#ifndef TARGET_OSX + if (result != NULL) + { + // Map the shared memory over the range reserved from the executable memory allocator. + result = mmap(result, size, PROT_NONE, MAP_SHARED | MAP_FIXED, fd, offset); + if (result == MAP_FAILED) + { + assert(false); + result = NULL; + } + } +#endif // TARGET_OSX + + return result; + } + +#ifndef TARGET_OSX + void* result = mmap(NULL, size, PROT_NONE, MAP_SHARED, fd, offset); +#else + void* result = mmap(NULL, size, PROT_NONE, MAP_JIT | MAP_ANON | MAP_PRIVATE, -1, 0); +#endif + if (result == MAP_FAILED) + { + assert(false); + result = NULL; + } + return result; +} + +void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable) +{ + if (mprotect(pStart, size, isExecutable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE)) == -1) + { + return NULL; + } + + return pStart; +} + +bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ +#ifndef TARGET_OSX + int fd = (int)(size_t)mapperHandle; + mmap(pStart, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, offset); + memset(pStart, 0, size); +#endif // TARGET_OSX + return munmap(pStart, size) != -1; +} + +void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ +#ifndef TARGET_OSX + int fd = (int)(size_t)mapperHandle; + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); +#else // TARGET_OSX +#ifdef TARGET_AMD64 + vm_address_t startRW; + vm_prot_t curProtection, maxProtection; + kern_return_t kr = vm_remap(mach_task_self(), &startRW, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, + mach_task_self(), (vm_address_t)pStart, FALSE, &curProtection, &maxProtection, VM_INHERIT_NONE); + + if (kr != KERN_SUCCESS) + { + return NULL; + } + + int st = mprotect((void*)startRW, size, PROT_READ | PROT_WRITE); + if (st == -1) + { + munmap((void*)startRW, size); + return NULL; + } + + return (void*)startRW; +#else // TARGET_AMD64 + // This method should not be called on OSX ARM64 + assert(false); + return NULL; +#endif // TARGET_AMD64 +#endif // TARGET_OSX +} + +bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) +{ + return munmap(pStart, size) != -1; +} diff --git a/src/coreclr/minipal/Windows/CMakeLists.txt b/src/coreclr/minipal/Windows/CMakeLists.txt new file mode 100644 index 0000000000000..b56b5017d375f --- /dev/null +++ b/src/coreclr/minipal/Windows/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(coreclrminipal + STATIC + doublemapping.cpp +) diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp new file mode 100644 index 0000000000000..5edda681f2598 --- /dev/null +++ b/src/coreclr/minipal/Windows/doublemapping.cpp @@ -0,0 +1,246 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +#include +#include +#include +#include "minipal.h" + +#define HIDWORD(_qw) ((ULONG)((_qw) >> 32)) +#define LODWORD(_qw) ((ULONG)(_qw)) + +#ifdef TARGET_64BIT +static const uint64_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024; +#else +static const uint64_t MaxDoubleMappedSize = UINT_MAX; +#endif + +#define VIRTUAL_ALLOC_RESERVE_GRANULARITY (64*1024) // 0x10000 (64 KB) +inline size_t ALIGN_UP( size_t val, size_t alignment ) +{ + // alignment must be a power of 2 for this implementation to work (need modulo otherwise) + assert( 0 == (alignment & (alignment - 1)) ); + size_t result = (val + (alignment - 1)) & ~(alignment - 1); + assert( result >= val ); // check for overflow + return result; +} + +template inline T ALIGN_UP(T val, size_t alignment) +{ + return (T)ALIGN_UP((size_t)val, alignment); +} + +inline void *GetTopMemoryAddress(void) +{ + static void *result; // = NULL; + if( NULL == result ) + { + SYSTEM_INFO sysInfo; + GetSystemInfo( &sysInfo ); + result = sysInfo.lpMaximumApplicationAddress; + } + return result; +} + +inline void *GetBotMemoryAddress(void) +{ + static void *result; // = NULL; + if( NULL == result ) + { + SYSTEM_INFO sysInfo; + GetSystemInfo( &sysInfo ); + result = sysInfo.lpMinimumApplicationAddress; + } + return result; +} + +#define TOP_MEMORY (GetTopMemoryAddress()) +#define BOT_MEMORY (GetBotMemoryAddress()) + +bool VMToOSInterface::CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize) +{ + *pMaxExecutableCodeSize = (size_t)MaxDoubleMappedSize; + *pHandle = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_EXECUTE_READWRITE | SEC_RESERVE, // read/write/execute access + HIDWORD(MaxDoubleMappedSize), // maximum object size (high-order DWORD) + LODWORD(MaxDoubleMappedSize), // maximum object size (low-order DWORD) + NULL); + + return *pHandle != NULL; +} + +void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle) +{ + CloseHandle((HANDLE)mapperHandle); +} + +void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *pMinAddr, const void* pMaxAddr) +{ + BYTE *pResult = nullptr; // our return value; + + if (size == 0) + { + return nullptr; + } + + // + // First lets normalize the pMinAddr and pMaxAddr values + // + // If pMinAddr is NULL then set it to BOT_MEMORY + if ((pMinAddr == 0) || (pMinAddr < (BYTE *) BOT_MEMORY)) + { + pMinAddr = (BYTE *) BOT_MEMORY; + } + + // If pMaxAddr is NULL then set it to TOP_MEMORY + if ((pMaxAddr == 0) || (pMaxAddr > (BYTE *) TOP_MEMORY)) + { + pMaxAddr = (BYTE *) TOP_MEMORY; + } + + // If pMaxAddr is not greater than pMinAddr we can not make an allocation + if (pMaxAddr <= pMinAddr) + { + return nullptr; + } + + // If pMinAddr is BOT_MEMORY and pMaxAddr is TOP_MEMORY + // then we can call ClrVirtualAlloc instead + if ((pMinAddr == (BYTE *) BOT_MEMORY) && (pMaxAddr == (BYTE *) TOP_MEMORY)) + { + return (BYTE*)MapViewOfFile((HANDLE)mapperHandle, + FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE, + HIDWORD((int64_t)offset), + LODWORD((int64_t)offset), + size); + } + + // We will do one scan from [pMinAddr .. pMaxAddr] + // First align the tryAddr up to next 64k base address. + // See docs for VirtualAllocEx and lpAddress and 64k alignment for reasons. + // + BYTE * tryAddr = (BYTE *)ALIGN_UP((BYTE *)pMinAddr, VIRTUAL_ALLOC_RESERVE_GRANULARITY); + bool virtualQueryFailed = false; + bool faultInjected = false; + unsigned virtualQueryCount = 0; + + // Now scan memory and try to find a free block of the size requested. + while ((tryAddr + size) <= (BYTE *) pMaxAddr) + { + MEMORY_BASIC_INFORMATION mbInfo; + + // Use VirtualQuery to find out if this address is MEM_FREE + // + virtualQueryCount++; + if (!VirtualQuery((LPCVOID)tryAddr, &mbInfo, sizeof(mbInfo))) + { + // Exit and return nullptr if the VirtualQuery call fails. + virtualQueryFailed = true; + break; + } + + // Is there enough memory free from this start location? + // Note that for most versions of UNIX the mbInfo.RegionSize returned will always be 0 + if ((mbInfo.State == MEM_FREE) && + (mbInfo.RegionSize >= (SIZE_T) size || mbInfo.RegionSize == 0)) + { + // Try reserving the memory using VirtualAlloc now + pResult = (BYTE*)MapViewOfFileEx((HANDLE)mapperHandle, + FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE, + HIDWORD((int64_t)offset), + LODWORD((int64_t)offset), + size, + tryAddr); + + // Normally this will be successful + // + if (pResult != nullptr) + { + // return pResult + break; + } + +#ifdef _DEBUG + // if (ShouldInjectFaultInRange()) + // { + // // return nullptr (failure) + // faultInjected = true; + // break; + // } +#endif // _DEBUG + + // On UNIX we can also fail if our request size 'dwSize' is larger than 64K and + // and our tryAddr is pointing at a small MEM_FREE region (smaller than 'dwSize') + // However we can't distinguish between this and the race case. + + // We might fail in a race. So just move on to next region and continue trying + tryAddr = tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY; + } + else + { + // Try another section of memory + tryAddr = max(tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY, + (BYTE*) mbInfo.BaseAddress + mbInfo.RegionSize); + } + } + + // STRESS_LOG7(LF_JIT, LL_INFO100, + // "ClrVirtualAllocWithinRange request #%u for %08x bytes in [ %p .. %p ], query count was %u - returned %s: %p\n", + // countOfCalls, (DWORD)dwSize, pMinAddr, pMaxAddr, + // virtualQueryCount, (pResult != nullptr) ? "success" : "failure", pResult); + + // If we failed this call the process will typically be terminated + // so we log any additional reason for failing this call. + // + if (pResult == nullptr) + { + // if ((tryAddr + dwSize) > (BYTE *)pMaxAddr) + // { + // // Our tryAddr reached pMaxAddr + // STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: Address space exhausted.\n"); + // } + + // if (virtualQueryFailed) + // { + // STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: VirtualQuery operation failed.\n"); + // } + + // if (faultInjected) + // { + // STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: fault injected.\n"); + // } + } + + return pResult; + +} + +void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable) +{ + return VirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READ : PAGE_READWRITE); +} + +bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ + // Zero the memory before the unmapping + VirtualAlloc(pStart, size, MEM_COMMIT, PAGE_READWRITE); + memset(pStart, 0, size); + return UnmapViewOfFile(pStart); +} + +void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ + return (BYTE*)MapViewOfFile((HANDLE)mapperHandle, + FILE_MAP_READ | FILE_MAP_WRITE, + HIDWORD((int64_t)offset), + LODWORD((int64_t)offset), + size); +} + +bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) +{ + return UnmapViewOfFile(pStart); +} diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h new file mode 100644 index 0000000000000..39098f9bc1295 --- /dev/null +++ b/src/coreclr/minipal/minipal.h @@ -0,0 +1,78 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +#include + +// Interface between the runtime and platform specific functionality +class VMToOSInterface +{ +private: + ~VMToOSInterface() {} +public: + // Create double mapped memory mapper + // Parameters: + // pHandle - receives handle of the double mapped memory mapper + // pMaxExecutableCodeSize - receives the maximum executable memory size it can map + // Return: + // true if it succeeded, false if it failed + static bool CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize); + + // Destroy the double mapped memory mapper represented by the passed in handle + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to destroy + static void DestroyDoubleMemoryMapper(void *mapperHandle); + + // Reserve a block of memory that can be double mapped. + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to use + // offset - offset in the underlying shared memory + // size - size of the block to reserve + // rangeStart + // rangeEnd - Requests reserving virtual memory in the specified range. + // Setting both rangeStart and rangeEnd to 0 means that the + // requested range is not limited. + // When a specific range is requested, it is obligatory. + // Return: + // starting virtual address of the reserved memory or NULL if it failed + static void* ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd); + + // Commit a block of memory in the range previously reserved by the ReserveDoubleMappedMemory + // Parameters: + // pStart - start address of the virtual address range to commit + // size - size of the memory block to commit + // isExecutable - true means that the mapping should be RX, false means RW + // Return: + // Committed range start + static void* CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable); + + // Release a block of virtual memory previously commited by the CommitDoubleMappedMemory + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to use + // pStart - start address of the virtual address range to release. It must be one + // that was previously returned by the CommitDoubleMappedMemory + // offset - offset in the underlying shared memory + // size - size of the memory block to release + // Return: + // true if it succeeded, false if it failed + static bool ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size); + + // Get a RW mapping for the RX block specified by the arguments + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to use + // pStart - start address of the RX virtual address range. + // offset - offset in the underlying shared memory + // size - size of the memory block to map as RW + // Return: + // Starting virtual address of the RW mapping. + static void* GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size); + + // Release RW mapping of the block specified by the arguments + // Parameters: + // pStart - Start address of the RW virtual address range. It must be an address + // previously returned by the GetRWMapping. + // size - Size of the memory block to release. It must be the size previously + // passed to the GetRWMapping that returned the pStart. + // Return: + // true if it succeeded, false if it failed + static bool ReleaseRWMapping(void* pStart, size_t size); +}; diff --git a/src/coreclr/utilcode/CMakeLists.txt b/src/coreclr/utilcode/CMakeLists.txt index 1ae433adbfd89..8c57742cb6315 100644 --- a/src/coreclr/utilcode/CMakeLists.txt +++ b/src/coreclr/utilcode/CMakeLists.txt @@ -69,6 +69,7 @@ endif(CLR_CMAKE_TARGET_WIN32) set(UTILCODE_SOURCES ${UTILCODE_COMMON_SOURCES} + executableallocator.cpp ) set(UTILCODE_DAC_SOURCES diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp new file mode 100644 index 0000000000000..4d461e66e7e51 --- /dev/null +++ b/src/coreclr/utilcode/executableallocator.cpp @@ -0,0 +1,755 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "pedecoder.h" +#include "executableallocator.h" + +#if USE_UPPER_ADDRESS +// Preferred region to allocate the code in. +BYTE * ExecutableAllocator::g_codeMinAddr; +BYTE * ExecutableAllocator::g_codeMaxAddr; +BYTE * ExecutableAllocator::g_codeAllocStart; +// Next address to try to allocate for code in the preferred region. +BYTE * ExecutableAllocator::g_codeAllocHint; +#endif // USE_UPPER_ADDRESS + +bool ExecutableAllocator::g_isWXorXEnabled = false; + +ExecutableAllocator::FatalErrorHandler ExecutableAllocator::g_fatalErrorHandler = NULL; + +ExecutableAllocator* ExecutableAllocator::g_instance = NULL; + +bool ExecutableAllocator::IsDoubleMappingEnabled() +{ + LIMITED_METHOD_CONTRACT; + +#if defined(HOST_OSX) && defined(HOST_ARM64) + return false; +#else + return g_isWXorXEnabled; +#endif +} + +bool ExecutableAllocator::IsWXORXEnabled() +{ + LIMITED_METHOD_CONTRACT; + +#if defined(HOST_OSX) && defined(HOST_ARM64) + return true; +#else + return g_isWXorXEnabled; +#endif +} + +extern SYSTEM_INFO g_SystemInfo; + +size_t ExecutableAllocator::Granularity() +{ + LIMITED_METHOD_CONTRACT; + + return g_SystemInfo.dwAllocationGranularity; +} + +// Use this function to initialize the g_codeAllocHint +// during startup. base is runtime .dll base address, +// size is runtime .dll virtual size. +void ExecutableAllocator::InitCodeAllocHint(size_t base, size_t size, int randomPageOffset) +{ +#if USE_UPPER_ADDRESS + +#ifdef _DEBUG + // If GetForceRelocs is enabled we don't constrain the pMinAddr + if (PEDecoder::GetForceRelocs()) + return; +#endif + + // + // If we are using the UPPER_ADDRESS space (on Win64) + // then for any code heap that doesn't specify an address + // range using [pMinAddr..pMaxAddr] we place it in the + // upper address space + // This enables us to avoid having to use long JumpStubs + // to reach the code for our ngen-ed images. + // Which are also placed in the UPPER_ADDRESS space. + // + SIZE_T reach = 0x7FFF0000u; + + // We will choose the preferred code region based on the address of clr.dll. The JIT helpers + // in clr.dll are the most heavily called functions. + g_codeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0; + g_codeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1; + + BYTE * pStart; + + if (g_codeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS && + (BYTE *)CODEHEAP_START_ADDRESS < g_codeMaxAddr) + { + // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista) + // Use the code head start address that does not cause collisions with NGen images. + // This logic is coupled with scripts that we use to assign base addresses. + pStart = (BYTE *)CODEHEAP_START_ADDRESS; + } + else + if (base > UINT32_MAX) + { + // clr.dll got address assigned by ASLR? + // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned + // addresses. Do not start at g_codeMinAddr exactly so that we can also reach common native images + // that can be placed at higher addresses than clr.dll. + pStart = g_codeMinAddr + (g_codeMaxAddr - g_codeMinAddr) / 8; + } + else + { + // clr.dll missed the base address? + // Try to occupy the space right after it. + pStart = (BYTE *)(base + size); + } + + // Randomize the address space + pStart += GetOsPageSize() * randomPageOffset; + + g_codeAllocStart = pStart; + g_codeAllocHint = pStart; +#endif +} + +// Use this function to reset the g_codeAllocHint +// after unloading an AppDomain +void ExecutableAllocator::ResetCodeAllocHint() +{ + LIMITED_METHOD_CONTRACT; +#if USE_UPPER_ADDRESS + g_codeAllocHint = g_codeAllocStart; +#endif +} + +// Returns TRUE if p is located in near clr.dll that allows us +// to use rel32 IP-relative addressing modes. +bool ExecutableAllocator::IsPreferredExecutableRange(void * p) +{ + LIMITED_METHOD_CONTRACT; +#if USE_UPPER_ADDRESS + if (g_codeMinAddr <= (BYTE *)p && (BYTE *)p < g_codeMaxAddr) + return true; +#endif + return false; +} + +ExecutableAllocator* ExecutableAllocator::Instance() +{ + LIMITED_METHOD_CONTRACT; + return g_instance; +} + +ExecutableAllocator::~ExecutableAllocator() +{ + if (IsDoubleMappingEnabled()) + { + VMToOSInterface::DestroyDoubleMemoryMapper(m_doubleMemoryMapperHandle); + } +} + +HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandler) +{ + LIMITED_METHOD_CONTRACT; + + g_fatalErrorHandler = fatalErrorHandler; + g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWXORX) != 0; + g_instance = new (nothrow) ExecutableAllocator(); + if (g_instance == NULL) + { + return E_OUTOFMEMORY; + } + + if (!g_instance->Initialize()) + { + return E_FAIL; + } + + return S_OK; +} + +bool ExecutableAllocator::Initialize() +{ + LIMITED_METHOD_CONTRACT; + + if (IsDoubleMappingEnabled()) + { + if (!VMToOSInterface::CreateDoubleMemoryMapper(&m_doubleMemoryMapperHandle, &m_maxExecutableCodeSize)) + { + return false; + } + + m_CriticalSection = ClrCreateCriticalSection(CrstExecutableAllocatorLock,CrstFlags(CRST_UNSAFE_ANYMODE | CRST_DEBUGGER_THREAD)); + } + + return true; +} + +//#define ENABLE_CACHED_MAPPINGS + +void ExecutableAllocator::UpdateCachedMapping(BlockRW* pBlock) +{ + LIMITED_METHOD_CONTRACT; +#ifdef ENABLE_CACHED_MAPPINGS + if (m_cachedMapping == NULL) + { + m_cachedMapping = pBlock; + pBlock->refCount++; + } + else if (m_cachedMapping != pBlock) + { + void* unmapAddress = NULL; + size_t unmapSize; + + if (!RemoveRWBlock(m_cachedMapping->baseRW, &unmapAddress, &unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found")); + } + if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed")); + } + m_cachedMapping = pBlock; + pBlock->refCount++; + } +#endif // ENABLE_CACHED_MAPPINGS +} + +void* ExecutableAllocator::FindRWBlock(void* baseRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next) + { + if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) + { + pBlock->refCount++; + UpdateCachedMapping(pBlock); + + return (BYTE*)pBlock->baseRW + ((size_t)baseRX - (size_t)pBlock->baseRX); + } + } + + return NULL; +} + +bool ExecutableAllocator::AddRWBlock(void* baseRW, void* baseRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next) + { + if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) + { + break; + } + } + + // The new "nothrow" below failure is handled as fail fast since it is not recoverable + PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure); + + BlockRW* pBlockRW = new (nothrow) BlockRW(); + if (pBlockRW == NULL) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block metadata cannot be allocated")); + return false; + } + + pBlockRW->baseRW = baseRW; + pBlockRW->baseRX = baseRX; + pBlockRW->size = size; + pBlockRW->next = m_pFirstBlockRW; + pBlockRW->refCount = 1; + m_pFirstBlockRW = pBlockRW; + + UpdateCachedMapping(pBlockRW); + + return true; +} + +bool ExecutableAllocator::RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize) +{ + LIMITED_METHOD_CONTRACT; + + BlockRW* pPrevBlockRW = NULL; + for (BlockRW* pBlockRW = m_pFirstBlockRW; pBlockRW != NULL; pBlockRW = pBlockRW->next) + { + if (pBlockRW->baseRW <= pRW && (size_t)pRW < ((size_t)pBlockRW->baseRW + pBlockRW->size)) + { + // found + pBlockRW->refCount--; + if (pBlockRW->refCount != 0) + { + *pUnmapAddress = NULL; + return true; + } + + if (pPrevBlockRW == NULL) + { + m_pFirstBlockRW = pBlockRW->next; + } + else + { + pPrevBlockRW->next = pBlockRW->next; + } + + *pUnmapAddress = pBlockRW->baseRW; + *pUnmapSize = pBlockRW->size; + + delete pBlockRW; + return true; + } + + pPrevBlockRW = pBlockRW; + } + + return false; +} + +bool ExecutableAllocator::AllocateOffset(size_t* pOffset, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + size_t offset = m_freeOffset; + size_t newFreeOffset = offset + size; + + if (newFreeOffset > m_maxExecutableCodeSize) + { + return false; + } + + m_freeOffset = newFreeOffset; + + *pOffset = offset; + + return true; +} + +void ExecutableAllocator::AddRXBlock(BlockRX* pBlock) +{ + LIMITED_METHOD_CONTRACT; + + pBlock->next = m_pFirstBlockRX; + m_pFirstBlockRX = pBlock; +} + +void* ExecutableAllocator::Commit(void* pStart, size_t size, bool isExecutable) +{ + LIMITED_METHOD_CONTRACT; + + if (IsDoubleMappingEnabled()) + { + return VMToOSInterface::CommitDoubleMappedMemory(pStart, size, isExecutable); + } + else + { + return ClrVirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); + } +} + +void ExecutableAllocator::Release(void* pRX) +{ + LIMITED_METHOD_CONTRACT; + + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + // Locate the RX block corresponding to the pRX and remove it from the linked list + BlockRX* pBlock; + BlockRX* pPrevBlock = NULL; + + for (pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next) + { + if (pRX == pBlock->baseRX) + { + if (pPrevBlock == NULL) + { + m_pFirstBlockRX = pBlock->next; + } + else + { + pPrevBlock->next = pBlock->next; + } + + break; + } + pPrevBlock = pBlock; + } + + if (pBlock != NULL) + { + VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size); + // Put the released block into the free block list + pBlock->baseRX = NULL; + pBlock->next = m_pFirstFreeBlockRX; + m_pFirstFreeBlockRX = pBlock; + } + else + { + // The block was not found, which should never happen. + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to release was not found")); + } + } + else + { + ClrVirtualFree(pRX, 0, MEM_RELEASE); + } +} + +// Find a free block with the closest size >= the requested size. +// Returns NULL if no such block exists. +ExecutableAllocator::BlockRX* ExecutableAllocator::FindBestFreeBlock(size_t size) +{ + LIMITED_METHOD_CONTRACT; + + BlockRX* pPrevBlock = NULL; + BlockRX* pPrevBestBlock = NULL; + BlockRX* pBestBlock = NULL; + BlockRX* pBlock = m_pFirstFreeBlockRX; + + while (pBlock != NULL) + { + if (pBlock->size >= size) + { + if (pBestBlock != NULL) + { + if (pBlock->size < pBestBlock->size) + { + pPrevBestBlock = pPrevBlock; + pBestBlock = pBlock; + } + } + else + { + pPrevBestBlock = pPrevBlock; + pBestBlock = pBlock; + } + } + pPrevBlock = pBlock; + pBlock = pBlock->next; + } + + if (pBestBlock != NULL) + { + if (pPrevBestBlock != NULL) + { + pPrevBestBlock->next = pBestBlock->next; + } + else + { + m_pFirstFreeBlockRX = pBestBlock->next; + } + + pBestBlock->next = NULL; + } + + return pBestBlock; +} + +// Allocate a new block of executable memory and the related descriptor structure. +// First try to get it from the free blocks and if there is no suitable free block, +// allocate a new one. +ExecutableAllocator::BlockRX* ExecutableAllocator::AllocateBlock(size_t size, bool* pIsFreeBlock) +{ + LIMITED_METHOD_CONTRACT; + + size_t offset; + BlockRX* block = FindBestFreeBlock(size); + *pIsFreeBlock = (block != NULL); + + if (block == NULL) + { + if (!AllocateOffset(&offset, size)) + { + return NULL; + } + + block = new (nothrow) BlockRX(); + if (block == NULL) + { + return NULL; + } + + block->offset = offset; + block->size = size; + } + + return block; +} + +// Backout a previously allocated block. The block is added to the free blocks list and +// reused for later allocation requests. +void ExecutableAllocator::BackoutBlock(BlockRX* pBlock, bool isFreeBlock) +{ + LIMITED_METHOD_CONTRACT; + + if (!isFreeBlock) + { + m_freeOffset -= pBlock->size; + delete pBlock; + } + else + { + pBlock->next = m_pFirstFreeBlockRX; + m_pFirstFreeBlockRX = pBlock; + } +} + +// Reserve executable memory within the specified virtual address space range. If it is not possible to +// reserve memory in that range, the method returns NULL and nothing is allocated. +void* ExecutableAllocator::ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE((size & (Granularity() - 1)) == 0); + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(size, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + void *result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, loAddress, hiAddress); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + + return result; + } + else + { + DWORD allocationType = MEM_RESERVE; +#ifdef HOST_UNIX + // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory. + // This will allow us to place JIT'ed code close to the coreclr library + // and thus improve performance by avoiding jump stubs in managed code. + allocationType |= MEM_RESERVE_EXECUTABLE; +#endif + return ClrVirtualAllocWithinRange((const BYTE*)loAddress, (const BYTE*)hiAddress, size, allocationType, PAGE_NOACCESS); + } +} + +// Reserve executable memory. On Windows it tries to use the allocation hints to +// allocate memory close to the previously allocated executable memory and loaded +// executable files. +void* ExecutableAllocator::Reserve(size_t size) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE((size & (Granularity() - 1)) == 0); + + BYTE *result = NULL; + +#if USE_UPPER_ADDRESS + // + // If we are using the UPPER_ADDRESS space (on Win64) + // then for any heap that will contain executable code + // we will place it in the upper address space + // + // This enables us to avoid having to use JumpStubs + // to reach the code for our ngen-ed images on x64, + // since they are also placed in the UPPER_ADDRESS space. + // + BYTE * pHint = g_codeAllocHint; + + if (size <= (SIZE_T)(g_codeMaxAddr - g_codeMinAddr) && pHint != NULL) + { + // Try to allocate in the preferred region after the hint + result = (BYTE*)ReserveWithinRange(size, pHint, g_codeMaxAddr); + if (result != NULL) + { + g_codeAllocHint = result + size; + } + else + { + // Try to allocate in the preferred region before the hint + result = (BYTE*)ReserveWithinRange(size, g_codeMinAddr, pHint + size); + + if (result != NULL) + { + g_codeAllocHint = result + size; + } + + g_codeAllocHint = NULL; + } + } + + // Fall through to +#endif // USE_UPPER_ADDRESS + + if (result == NULL) + { + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(size, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + result = (BYTE*)VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, 0, 0); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + } + else + { + DWORD allocationType = MEM_RESERVE; +#ifdef HOST_UNIX + // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory. + // This will allow us to place JIT'ed code close to the coreclr library + // and thus improve performance by avoiding jump stubs in managed code. + allocationType |= MEM_RESERVE_EXECUTABLE; +#endif + result = (BYTE*)ClrVirtualAlloc(NULL, size, allocationType, PAGE_NOACCESS); + } + } + + return result; +} + +// Reserve a block of executable memory at the specified virtual address. If it is not +// possible, the method returns NULL. +void* ExecutableAllocator::ReserveAt(void* baseAddressRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE((size & (Granularity() - 1)) == 0); + + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(size, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + void* result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, baseAddressRX, baseAddressRX); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + + return result; + } + else + { + return VirtualAlloc(baseAddressRX, size, MEM_RESERVE, PAGE_NOACCESS); + } +} + +// Map an executable memory block as writeable. If there is already a mapping +// covering the specified block, return that mapping instead of creating a new one. +// Return starting address of the writeable mapping. +void* ExecutableAllocator::MapRW(void* pRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + if (!IsDoubleMappingEnabled()) + { + return pRX; + } + + CRITSEC_Holder csh(m_CriticalSection); + + void* result = FindRWBlock(pRX, size); + if (result != NULL) + { + return result; + } + + for (BlockRX* pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next) + { + if (pRX >= pBlock->baseRX && ((size_t)pRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) + { + // Offset of the RX address in the originally allocated block + size_t offset = (size_t)pRX - (size_t)pBlock->baseRX; + // Offset of the RX address that will start the newly mapped block + size_t mapOffset = ALIGN_DOWN(offset, Granularity()); + // Size of the block we will map + size_t mapSize = ALIGN_UP(offset - mapOffset + size, Granularity()); + void* pRW = VMToOSInterface::GetRWMapping(m_doubleMemoryMapperHandle, (BYTE*)pBlock->baseRX + mapOffset, pBlock->offset + mapOffset, mapSize); + + if (pRW == NULL) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Failed to create RW mapping for RX memory")); + } + + AddRWBlock(pRW, (BYTE*)pBlock->baseRX + mapOffset, mapSize); + + return (void*)((size_t)pRW + (offset - mapOffset)); + } + else if (pRX >= pBlock->baseRX && pRX < (void*)((size_t)pBlock->baseRX + pBlock->size)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to RW map a block that crosses the end of the allocated RX range")); + } + else if (pRX < pBlock->baseRX && (void*)((size_t)pRX + size) > pBlock->baseRX) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to map a block that crosses the beginning of the allocated range")); + } + } + + // The executable memory block was not found, so we cannot provide the writeable mapping. + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to map as RW was not found")); + return NULL; +} + +// Unmap writeable mapping at the specified address. The address must be an address +// returned by the MapRW method. +void ExecutableAllocator::UnmapRW(void* pRW) +{ + LIMITED_METHOD_CONTRACT; + + if (!IsDoubleMappingEnabled()) + { + return; + } + + CRITSEC_Holder csh(m_CriticalSection); + _ASSERTE(pRW != NULL); + + void* unmapAddress = NULL; + size_t unmapSize; + + if (!RemoveRWBlock(pRW, &unmapAddress, &unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found")); + } + + if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed")); + } +} diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp index adaf07d8f5825..5828763f512f2 100644 --- a/src/coreclr/utilcode/loaderheap.cpp +++ b/src/coreclr/utilcode/loaderheap.cpp @@ -977,9 +977,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap() if (fReleaseMemory) { - BOOL fSuccess; - fSuccess = ClrVirtualFree(pVirtualAddress, 0, MEM_RELEASE); - _ASSERTE(fSuccess); + ExecutableAllocator::Instance()->Release(pVirtualAddress); } delete pSearch; @@ -987,9 +985,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap() if (m_reservedBlock.m_fReleaseMemory) { - BOOL fSuccess; - fSuccess = ClrVirtualFree(m_reservedBlock.pVirtualAddress, 0, MEM_RELEASE); - _ASSERTE(fSuccess); + ExecutableAllocator::Instance()->Release(m_reservedBlock.pVirtualAddress); } INDEBUG(s_dwNumInstancesOfLoaderHeaps --;) @@ -1058,7 +1054,7 @@ void ReleaseReservedMemory(BYTE* value) { if (value) { - ClrVirtualFree(value, 0, MEM_RELEASE); + ExecutableAllocator::Instance()->Release(value); } } @@ -1114,7 +1110,9 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) // Reserve pages // - pData = ClrVirtualAllocExecutable(dwSizeToReserve, MEM_RESERVE, PAGE_NOACCESS); + // Reserve the memory for even non-executable stuff close to the executable code, as it has profound effect + // on e.g. a static variable access performance. + pData = (BYTE *)ExecutableAllocator::Instance()->Reserve(dwSizeToReserve); if (pData == NULL) { return FALSE; @@ -1140,7 +1138,7 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) } // Commit first set of pages, since it will contain the LoaderHeapBlock - void *pTemp = ClrVirtualAlloc(pData, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); + void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommit, (m_Options & LHF_EXECUTABLE)); if (pTemp == NULL) { //_ASSERTE(!"Unable to ClrVirtualAlloc commit in a loaderheap"); @@ -1213,7 +1211,7 @@ BOOL UnlockedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize) dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); // Yes, so commit the desired number of reserved pages - void *pData = ClrVirtualAlloc(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); + void *pData = ExecutableAllocator::Instance()->Commit(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, (m_Options & LHF_EXECUTABLE)); if (pData == NULL) return FALSE; diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp index 0026d1f619f14..e7b1755b2b1c4 100644 --- a/src/coreclr/utilcode/util.cpp +++ b/src/coreclr/utilcode/util.cpp @@ -352,168 +352,6 @@ HRESULT FakeCoCreateInstanceEx(REFCLSID rclsid, return hr; } -#if USE_UPPER_ADDRESS -static BYTE * s_CodeMinAddr; // Preferred region to allocate the code in. -static BYTE * s_CodeMaxAddr; -static BYTE * s_CodeAllocStart; -static BYTE * s_CodeAllocHint; // Next address to try to allocate for code in the preferred region. -#endif - -// -// Use this function to initialize the s_CodeAllocHint -// during startup. base is runtime .dll base address, -// size is runtime .dll virtual size. -// -void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset) -{ -#if USE_UPPER_ADDRESS - -#ifdef _DEBUG - // If GetForceRelocs is enabled we don't constrain the pMinAddr - if (PEDecoder::GetForceRelocs()) - return; -#endif - -// - // If we are using the UPPER_ADDRESS space (on Win64) - // then for any code heap that doesn't specify an address - // range using [pMinAddr..pMaxAddr] we place it in the - // upper address space - // This enables us to avoid having to use long JumpStubs - // to reach the code for our ngen-ed images. - // Which are also placed in the UPPER_ADDRESS space. - // - SIZE_T reach = 0x7FFF0000u; - - // We will choose the preferred code region based on the address of clr.dll. The JIT helpers - // in clr.dll are the most heavily called functions. - s_CodeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0; - s_CodeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1; - - BYTE * pStart; - - if (s_CodeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS && - (BYTE *)CODEHEAP_START_ADDRESS < s_CodeMaxAddr) - { - // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista) - // Use the code head start address that does not cause collisions with NGen images. - // This logic is coupled with scripts that we use to assign base addresses. - pStart = (BYTE *)CODEHEAP_START_ADDRESS; - } - else - if (base > UINT32_MAX) - { - // clr.dll got address assigned by ASLR? - // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned - // addresses. Do not start at s_CodeMinAddr exactly so that we can also reach common native images - // that can be placed at higher addresses than clr.dll. - pStart = s_CodeMinAddr + (s_CodeMaxAddr - s_CodeMinAddr) / 8; - } - else - { - // clr.dll missed the base address? - // Try to occupy the space right after it. - pStart = (BYTE *)(base + size); - } - - // Randomize the address space - pStart += GetOsPageSize() * randomPageOffset; - - s_CodeAllocStart = pStart; - s_CodeAllocHint = pStart; -#endif -} - -// -// Use this function to reset the s_CodeAllocHint -// after unloading an AppDomain -// -void ResetCodeAllocHint() -{ - LIMITED_METHOD_CONTRACT; -#if USE_UPPER_ADDRESS - s_CodeAllocHint = s_CodeAllocStart; -#endif -} - -// -// Returns TRUE if p is located in near clr.dll that allows us -// to use rel32 IP-relative addressing modes. -// -BOOL IsPreferredExecutableRange(void * p) -{ - LIMITED_METHOD_CONTRACT; -#if USE_UPPER_ADDRESS - if (s_CodeMinAddr <= (BYTE *)p && (BYTE *)p < s_CodeMaxAddr) - return TRUE; -#endif - return FALSE; -} - -// -// Allocate free memory that will be used for executable code -// Handles the special requirements that we have on 64-bit platforms -// where we want the executable memory to be located near clr.dll -// -BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize, - DWORD flAllocationType, - DWORD flProtect) -{ - CONTRACTL - { - NOTHROW; - } - CONTRACTL_END; - -#if USE_UPPER_ADDRESS - // - // If we are using the UPPER_ADDRESS space (on Win64) - // then for any heap that will contain executable code - // we will place it in the upper address space - // - // This enables us to avoid having to use JumpStubs - // to reach the code for our ngen-ed images on x64, - // since they are also placed in the UPPER_ADDRESS space. - // - BYTE * pHint = s_CodeAllocHint; - - if (dwSize <= (SIZE_T)(s_CodeMaxAddr - s_CodeMinAddr) && pHint != NULL) - { - // Try to allocate in the preferred region after the hint - BYTE * pResult = ClrVirtualAllocWithinRange(pHint, s_CodeMaxAddr, dwSize, flAllocationType, flProtect); - - if (pResult != NULL) - { - s_CodeAllocHint = pResult + dwSize; - return pResult; - } - - // Try to allocate in the preferred region before the hint - pResult = ClrVirtualAllocWithinRange(s_CodeMinAddr, pHint + dwSize, dwSize, flAllocationType, flProtect); - - if (pResult != NULL) - { - s_CodeAllocHint = pResult + dwSize; - return pResult; - } - - s_CodeAllocHint = NULL; - } - - // Fall through to -#endif // USE_UPPER_ADDRESS - -#ifdef HOST_UNIX - // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory. - // This will allow us to place JIT'ed code close to the coreclr library - // and thus improve performance by avoiding jump stubs in managed code. - flAllocationType |= MEM_RESERVE_EXECUTABLE; -#endif // HOST_UNIX - - return (BYTE *) ClrVirtualAlloc (NULL, dwSize, flAllocationType, flProtect); - -} - // // Allocate free memory with specific alignment. // diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 1d682d2a428bb..9c2cb3df0b7e9 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -833,7 +833,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/exceparm.cpp ${ARCH_SOURCES_DIR}/stubs.cpp - ${ARCH_SOURCES_DIR}/armsinglestepper.cpp ) set(VM_HEADERS_DAC_AND_WKS_ARCH @@ -844,6 +843,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_WKS_ARCH ${ARCH_SOURCES_DIR}/profiler.cpp + ${ARCH_SOURCES_DIR}/armsinglestepper.cpp exceptionhandling.cpp gcinfodecoder.cpp ) @@ -868,7 +868,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ) if(CLR_CMAKE_HOST_UNIX) - list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH + list(APPEND VM_SOURCES_WKS_ARCH ${ARCH_SOURCES_DIR}/arm64singlestepper.cpp ) endif(CLR_CMAKE_HOST_UNIX) diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 82a301bb0cbd1..219597eb350c2 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -51,37 +51,6 @@ endif extern JIT_InternalThrow:proc -; There is an even more optimized version of these helpers possible which takes -; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 -; that check (this is more significant in the JIT_WriteBarrier case). -; -; Additionally we can look into providing helpers which will take the src/dest from -; specific registers (like x86) which _could_ (??) make for easier register allocation -; for the JIT64, however it might lead to having to have some nasty code that treats -; these guys really special like... :(. -; -; Version that does the move, checks whether or not it's in the GC and whether or not -; it needs to have it's card updated -; -; void JIT_CheckedWriteBarrier(Object** dst, Object* src) -LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT - - ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference - ; but if it isn't then it will just return. - ; - ; See if this is in GCHeap - cmp rcx, [g_lowest_address] - jb NotInHeap - cmp rcx, [g_highest_address] - jnb NotInHeap - - jmp JIT_WriteBarrier - - NotInHeap: - ; See comment above about possible AV - mov [rcx], rdx - ret -LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT ; Mark start of the code region that we patch at runtime LEAF_ENTRY JIT_PatchedCodeStart, _TEXT @@ -99,7 +68,8 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT ifdef _DEBUG ; In debug builds, this just contains jump to the debug version of the write barrier by default - jmp JIT_WriteBarrier_Debug + mov rax, JIT_WriteBarrier_Debug + jmp rax endif ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -388,6 +358,51 @@ endif ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT +Section segment para 'DATA' + + align 16 + + public JIT_WriteBarrier_Loc +JIT_WriteBarrier_Loc: + dq 0 + +LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT + ; JIT_WriteBarrier(Object** dst, Object* src) + jmp QWORD PTR [JIT_WriteBarrier_Loc] +LEAF_END JIT_WriteBarrier_Callable, _TEXT + +; There is an even more optimized version of these helpers possible which takes +; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 +; that check (this is more significant in the JIT_WriteBarrier case). +; +; Additionally we can look into providing helpers which will take the src/dest from +; specific registers (like x86) which _could_ (??) make for easier register allocation +; for the JIT64, however it might lead to having to have some nasty code that treats +; these guys really special like... :(. +; +; Version that does the move, checks whether or not it's in the GC and whether or not +; it needs to have it's card updated +; +; void JIT_CheckedWriteBarrier(Object** dst, Object* src) +LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT + + ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + ; but if it isn't then it will just return. + ; + ; See if this is in GCHeap + cmp rcx, [g_lowest_address] + jb NotInHeap + cmp rcx, [g_highest_address] + jnb NotInHeap + + jmp QWORD PTR [JIT_WriteBarrier_Loc] + + NotInHeap: + ; See comment above about possible AV + mov [rcx], rdx + ret +LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT + ; The following helper will access ("probe") a word on each page of the stack ; starting with the page right beneath rsp down to the one pointed to by r11. ; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame. diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index a13afb4878511..8109886d0c969 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -32,26 +32,14 @@ LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT // See if this is in GCHeap PREPARE_EXTERNAL_VAR g_lowest_address, rax cmp rdi, [rax] -#ifdef FEATURE_WRITEBARRIER_COPY // jb NotInHeap .byte 0x72, 0x12 -#else - // jb NotInHeap - .byte 0x72, 0x0e -#endif PREPARE_EXTERNAL_VAR g_highest_address, rax cmp rdi, [rax] -#ifdef FEATURE_WRITEBARRIER_COPY // jnb NotInHeap .byte 0x73, 0x06 jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)] -#else - // jnb NotInHeap - .byte 0x73, 0x02 - // jmp C_FUNC(JIT_WriteBarrier) - .byte 0xeb, 0x05 -#endif NotInHeap: // See comment above about possible AV @@ -398,11 +386,17 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT -#ifdef FEATURE_WRITEBARRIER_COPY // When JIT_WriteBarrier is copied into an allocated page, // helpers use this global variable to jump to it. This variable is set in InitThreadManager. - .global _JIT_WriteBarrier_Loc - .zerofill __DATA,__common,_JIT_WriteBarrier_Loc,8,3 + .global C_FUNC(JIT_WriteBarrier_Loc) +#ifdef TARGET_OSX + .zerofill __DATA,__common,C_FUNC(JIT_WriteBarrier_Loc),8,3 +#else + .data + C_FUNC(JIT_WriteBarrier_Loc): + .quad 0 + .text +#endif // ------------------------------------------------------------------ // __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) @@ -412,8 +406,6 @@ LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)] LEAF_END JIT_WriteBarrier_Callable, _TEXT -#endif // FEATURE_WRITEBARRIER_COPY - // The following helper will access ("probe") a word on each page of the stack // starting with the page right beneath rsp down to the one pointed to by r11. diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp index 38bff78a54cb0..02b023777b8a9 100644 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp @@ -293,7 +293,10 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, // the memcpy must come before the switch statment because the asserts inside the switch // are actually looking into the JIT_WriteBarrier buffer - memcpy(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize()); + { + ExecutableWriterHolder writeBarrierWriterHolder(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), GetCurrentWriteBarrierSize()); + memcpy(writeBarrierWriterHolder.GetRW(), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize()); + } switch (newWriteBarrier) { @@ -544,7 +547,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) // Change immediate if different from new g_ephermeral_high. if (*(UINT64*)m_pUpperBoundImmediate != (size_t)g_ephemeral_high) { - *(UINT64*)m_pUpperBoundImmediate = (size_t)g_ephemeral_high; + ExecutableWriterHolder upperBoundWriterHolder((UINT64*)m_pUpperBoundImmediate, sizeof(UINT64)); + *upperBoundWriterHolder.GetRW() = (size_t)g_ephemeral_high; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } } @@ -557,7 +561,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) // Change immediate if different from new g_ephermeral_low. if (*(UINT64*)m_pLowerBoundImmediate != (size_t)g_ephemeral_low) { - *(UINT64*)m_pLowerBoundImmediate = (size_t)g_ephemeral_low; + ExecutableWriterHolder lowerBoundImmediateWriterHolder((UINT64*)m_pLowerBoundImmediate, sizeof(UINT64)); + *lowerBoundImmediateWriterHolder.GetRW() = (size_t)g_ephemeral_low; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } break; @@ -609,7 +614,8 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus #endif // FEATURE_SVR_GC if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table) { - *(UINT64*)m_pWriteWatchTableImmediate = (size_t)g_sw_ww_table; + ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pWriteWatchTableImmediate, sizeof(UINT64)); + *writeWatchTableImmediateWriterHolder.GetRW() = (size_t)g_sw_ww_table; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } break; @@ -621,14 +627,16 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table) { - *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table; + ExecutableWriterHolder cardTableImmediateWriterHolder((UINT64*)m_pCardTableImmediate, sizeof(UINT64)); + *cardTableImmediateWriterHolder.GetRW() = (size_t)g_card_table; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES if (*(UINT64*)m_pCardBundleTableImmediate != (size_t)g_card_bundle_table) { - *(UINT64*)m_pCardBundleTableImmediate = (size_t)g_card_bundle_table; + ExecutableWriterHolder cardBundleTableImmediateWriterHolder((UINT64*)m_pCardBundleTableImmediate, sizeof(UINT64)); + *cardBundleTableImmediateWriterHolder.GetRW() = (size_t)g_card_bundle_table; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } #endif diff --git a/src/coreclr/vm/arm/armsinglestepper.cpp b/src/coreclr/vm/arm/armsinglestepper.cpp index 79317263b2223..f9e718ae5420e 100644 --- a/src/coreclr/vm/arm/armsinglestepper.cpp +++ b/src/coreclr/vm/arm/armsinglestepper.cpp @@ -97,11 +97,7 @@ ArmSingleStepper::ArmSingleStepper() ArmSingleStepper::~ArmSingleStepper() { #if !defined(DACCESS_COMPILE) -#ifdef TARGET_UNIX SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(WORD)); -#else - DeleteExecutable(m_rgCode); -#endif #endif } @@ -110,11 +106,7 @@ void ArmSingleStepper::Init() #if !defined(DACCESS_COMPILE) if (m_rgCode == NULL) { -#ifdef TARGET_UNIX m_rgCode = (WORD *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(WORD))); -#else - m_rgCode = new (executable) WORD[kMaxCodeBuffer]; -#endif } #endif } @@ -287,6 +279,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) DWORD idxNextInstruction = 0; + ExecutableWriterHolder codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); + if (m_originalITState.InITBlock() && !ConditionHolds(pCtx, m_originalITState.CurrentCondition())) { LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper: Case 1: ITState::Clear;\n")); @@ -295,7 +289,7 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) // to execute. We'll put the correct value back during fixup. ITState::Clear(pCtx); m_fSkipIT = true; - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; } else if (TryEmulate(pCtx, opcode1, opcode2, false)) { @@ -308,8 +302,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) m_fEmulate = true; // Set breakpoints to stop the execution. This will get us right back here. - m_rgCode[idxNextInstruction++] = kBreakpointOp; - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; } else { @@ -323,24 +317,24 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) // guarantee one of them will be hit (we don't care which one -- the fixup code will update // the PC and IT state to make it look as though the CPU just executed the current // instruction). - m_rgCode[idxNextInstruction++] = opcode1; + codeWriterHolder.GetRW()[idxNextInstruction++] = opcode1; if (Is32BitInstruction(opcode1)) - m_rgCode[idxNextInstruction++] = opcode2; + codeWriterHolder.GetRW()[idxNextInstruction++] = opcode2; - m_rgCode[idxNextInstruction++] = kBreakpointOp; - m_rgCode[idxNextInstruction++] = kBreakpointOp; - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; } // Always terminate the redirection buffer with a breakpoint. - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; _ASSERTE(idxNextInstruction <= kMaxCodeBuffer); // Set the thread up so it will redirect to our buffer when execution resumes. pCtx->Pc = ((DWORD)(DWORD_PTR)m_rgCode) | THUMB_CODE; // Make sure the CPU sees the updated contents of the buffer. - FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode)); + FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); // Done, set the state. m_state = Applied; diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 930395b56dc7e..3faa8fe36846e 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -978,6 +978,16 @@ g_rgWriteBarrierDescriptors: .global g_rgWriteBarrierDescriptors +// ------------------------------------------------------------------ +// __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) + LEAF_ENTRY JIT_WriteBarrier_Callable + + // Branch to the write barrier + ldr r2, =JIT_WriteBarrier_Loc // or R3? See targetarm.h + ldr pc, [r2] + + LEAF_END JIT_WriteBarrier_Callable + #ifdef FEATURE_READYTORUN NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler diff --git a/src/coreclr/vm/arm/asmhelpers.asm b/src/coreclr/vm/arm/asmhelpers.asm index d20540e62090e..82596e66693dc 100644 --- a/src/coreclr/vm/arm/asmhelpers.asm +++ b/src/coreclr/vm/arm/asmhelpers.asm @@ -1724,6 +1724,18 @@ tempReg SETS "$tmpReg" END_WRITE_BARRIERS + IMPORT JIT_WriteBarrier_Loc + +; ------------------------------------------------------------------ +; __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) + LEAF_ENTRY JIT_WriteBarrier_Callable + + ; Branch to the write barrier + ldr r2, =JIT_WriteBarrier_Loc ; or R3? See targetarm.h + ldr pc, [r2] + + LEAF_END + #ifdef FEATURE_READYTORUN NESTED_ENTRY DelayLoad_MethodCall_FakeProlog diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 88d0c6802b69d..425c286558432 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -1069,6 +1069,7 @@ struct StubPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -1095,6 +1096,7 @@ struct StubPrecode { return (TADDR)InterlockedCompareExchange( (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; } +#endif // !DACCESS_COMPILE #ifdef FEATURE_PREJIT void Fixup(DataImage *image); @@ -1167,6 +1169,13 @@ struct FixupPrecode { return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); } + size_t GetSizeRW() + { + LIMITED_METHOD_CONTRACT; + + return GetBase() + sizeof(void*) - dac_cast(this); + } + TADDR GetMethodDesc(); PCODE GetTarget() @@ -1175,6 +1184,7 @@ struct FixupPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -1201,6 +1211,7 @@ struct FixupPrecode { return (TADDR)InterlockedCompareExchange( (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; } +#endif // !DACCESS_COMPILE static BOOL IsFixupPrecodeByASM(PCODE addr) { @@ -1256,6 +1267,7 @@ struct ThisPtrRetBufPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL @@ -1268,6 +1280,7 @@ struct ThisPtrRetBufPrecode { ExecutableWriterHolder precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); return FastInterlockCompareExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected; } +#endif // !DACCESS_COMPILE }; typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index aac3e25b18146..b2bf6e0522ea5 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -334,11 +334,19 @@ void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength) void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode) { - TADDR dst = PCODEToPINSTR(dstCode); + TADDR dst = (TADDR)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation((void*)dstCode)); TADDR src = PCODEToPINSTR(srcCode); TADDR end = PCODEToPINSTR(endCode); size_t size = (PBYTE)end - (PBYTE)src; + + ExecutableWriterHolder writeBarrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + writeBarrierWriterHolder = ExecutableWriterHolder((void*)dst, size); + dst = (TADDR)writeBarrierWriterHolder.GetRW(); + } + memcpy((PVOID)dst, (PVOID)src, size); } @@ -419,7 +427,7 @@ void UpdateGCWriteBarriers(bool postGrow = false) } #define GWB_PATCH_OFFSET(_global) \ if (pDesc->m_dw_##_global##_offset != 0xffff) \ - PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset - 1), (UINT32)(dac_cast(_global))); + PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset), (UINT32)(dac_cast(_global))); // Iterate through the write barrier patch table created in the .clrwb section // (see write barrier asm code) @@ -431,6 +439,13 @@ void UpdateGCWriteBarriers(bool postGrow = false) PBYTE to = FindWBMapping(pDesc->m_pFuncStart); if(to) { + to = (PBYTE)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation(to)); + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(to, pDesc->m_pFuncEnd - pDesc->m_pFuncStart); + to = barrierWriterHolder.GetRW(); + } GWB_PATCH_OFFSET(g_lowest_address); GWB_PATCH_OFFSET(g_highest_address); GWB_PATCH_OFFSET(g_ephemeral_low); diff --git a/src/coreclr/vm/arm64/arm64singlestepper.cpp b/src/coreclr/vm/arm64/arm64singlestepper.cpp index d45925311a33e..6c1764647c9f2 100644 --- a/src/coreclr/vm/arm64/arm64singlestepper.cpp +++ b/src/coreclr/vm/arm64/arm64singlestepper.cpp @@ -46,11 +46,7 @@ Arm64SingleStepper::Arm64SingleStepper() Arm64SingleStepper::~Arm64SingleStepper() { #if !defined(DACCESS_COMPILE) -#ifdef TARGET_UNIX SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(uint32_t)); -#else - DeleteExecutable(m_rgCode); -#endif #endif } @@ -59,11 +55,7 @@ void Arm64SingleStepper::Init() #if !defined(DACCESS_COMPILE) if (m_rgCode == NULL) { -#ifdef TARGET_UNIX m_rgCode = (uint32_t *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(uint32_t))); -#else - m_rgCode = new (executable) uint32_t[kMaxCodeBuffer]; -#endif } #endif } @@ -207,7 +199,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx) unsigned int idxNextInstruction = 0; - ExecutableWriterHolder codeWriterHolder(m_rgCode, sizeof(m_rgCode)); + ExecutableWriterHolder codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); if (TryEmulate(pCtx, opcode, false)) { @@ -230,7 +222,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx) pCtx->Pc = (uint64_t)m_rgCode; // Make sure the CPU sees the updated contents of the buffer. - FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode)); + FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); // Done, set the state. m_state = Applied; diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index e6b47d07b2b0c..8ef66586cd22c 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -270,13 +270,9 @@ LOCAL_LABEL(EphemeralCheckEnabled): ldr x7, [x12] // Update wbs state -#ifdef FEATURE_WRITEBARRIER_COPY PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Table_Loc, x12 ldr x12, [x12] add x12, x12, x9 -#else // FEATURE_WRITEBARRIER_COPY - adr x12, LOCAL_LABEL(wbs_begin) -#endif // FEATURE_WRITEBARRIER_COPY stp x0, x1, [x12], 16 stp x2, x3, [x12], 16 @@ -295,16 +291,10 @@ LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT mov x14, x0 // x14 = dst mov x15, x1 // x15 = val -#ifdef FEATURE_WRITEBARRIER_COPY -LOCAL_LABEL(Branch_JIT_WriteBarrier_Copy): // Branch to the write barrier PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Loc, x17 ldr x17, [x17] br x17 -#else // FEATURE_WRITEBARRIER_COPY - // Branch to the write barrier - b C_FUNC(JIT_WriteBarrier) -#endif // FEATURE_WRITEBARRIER_COPY LEAF_END JIT_WriteBarrier_Callable, _TEXT .balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index ffbeb9fd1acb3..17d3a676940bd 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -61,6 +61,10 @@ #ifdef FEATURE_COMINTEROP IMPORT CLRToCOMWorker #endif // FEATURE_COMINTEROP + + IMPORT JIT_WriteBarrier_Table_Loc + IMPORT JIT_WriteBarrier_Loc + TEXTAREA ;; LPVOID __stdcall GetCurrentIP(void); @@ -308,6 +312,7 @@ ThePreStubPatchLabel ; x12 will be used for pointers mov x8, x0 + mov x9, x1 adrp x12, g_card_table ldr x0, [x12, g_card_table] @@ -346,7 +351,9 @@ EphemeralCheckEnabled ldr x7, [x12, g_highest_address] ; Update wbs state - adr x12, wbs_begin + adrp x12, JIT_WriteBarrier_Table_Loc + ldr x12, [x12, JIT_WriteBarrier_Table_Loc] + add x12, x12, x9 stp x0, x1, [x12], 16 stp x2, x3, [x12], 16 stp x4, x5, [x12], 16 @@ -355,9 +362,11 @@ EphemeralCheckEnabled EPILOG_RESTORE_REG_PAIR fp, lr, #16! EPILOG_RETURN + WRITE_BARRIER_END JIT_UpdateWriteBarrierState + ; Begin patchable literal pool ALIGN 64 ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line - + WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table wbs_begin wbs_card_table DCQ 0 @@ -375,14 +384,7 @@ wbs_lowest_address DCQ 0 wbs_highest_address DCQ 0 - - WRITE_BARRIER_END JIT_UpdateWriteBarrierState - -; ------------------------------------------------------------------ -; End of the writeable code region - LEAF_ENTRY JIT_PatchedCodeLast - ret lr - LEAF_END + WRITE_BARRIER_END JIT_WriteBarrier_Table ; void JIT_ByRefWriteBarrier ; On entry: @@ -546,6 +548,12 @@ Exit ret lr WRITE_BARRIER_END JIT_WriteBarrier +; ------------------------------------------------------------------ +; End of the writeable code region + LEAF_ENTRY JIT_PatchedCodeLast + ret lr + LEAF_END + #ifdef FEATURE_PREJIT ;------------------------------------------------ ; VirtualMethodFixupStub @@ -1417,9 +1425,10 @@ CallHelper2 mov x14, x0 ; x14 = dst mov x15, x1 ; x15 = val - ; Branch to the write barrier (which is already correctly overwritten with - ; single or multi-proc code based on the current CPU - b JIT_WriteBarrier + ; Branch to the write barrier + adrp x17, JIT_WriteBarrier_Loc + ldr x17, [x17, JIT_WriteBarrier_Loc] + br x17 LEAF_END diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 83e56cfb9f9b9..0641d89ff1a91 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -597,6 +597,7 @@ struct StubPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -623,6 +624,7 @@ struct StubPrecode { return (TADDR)InterlockedCompareExchange64( (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; } +#endif // !DACCESS_COMPILE #ifdef FEATURE_PREJIT void Fixup(DataImage *image); @@ -715,6 +717,13 @@ struct FixupPrecode { return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); } + size_t GetSizeRW() + { + LIMITED_METHOD_CONTRACT; + + return GetBase() + sizeof(void*) - dac_cast(this); + } + TADDR GetMethodDesc(); PCODE GetTarget() @@ -723,6 +732,7 @@ struct FixupPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -749,6 +759,7 @@ struct FixupPrecode { return (TADDR)InterlockedCompareExchange64( (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; } +#endif // !DACCESS_COMPILE static BOOL IsFixupPrecodeByASM(PCODE addr) { @@ -797,6 +808,7 @@ struct ThisPtrRetBufPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL @@ -810,6 +822,7 @@ struct ThisPtrRetBufPrecode { return (TADDR)InterlockedCompareExchange64( (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; } +#endif // !DACCESS_COMPILE }; typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 54cf1c4927548..12d56ddb9867e 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -1067,8 +1067,14 @@ extern "C" void STDCALL JIT_PatchedCodeLast(); static void UpdateWriteBarrierState(bool skipEphemeralCheck) { BYTE *writeBarrierCodeStart = GetWriteBarrierCodeLocation((void*)JIT_PatchedCodeStart); - ExecutableWriterHolder writeBarrierWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); - JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierWriterHolder.GetRW() - writeBarrierCodeStart); + BYTE *writeBarrierCodeStartRW = writeBarrierCodeStart; + ExecutableWriterHolder writeBarrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + writeBarrierWriterHolder = ExecutableWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); + writeBarrierCodeStartRW = writeBarrierWriterHolder.GetRW(); + } + JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierCodeStartRW - writeBarrierCodeStart); } void InitJITHelpers1() diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index cdc5925234af9..b60aac924d2e2 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -607,6 +607,11 @@ void EESocketCleanupHelper(bool isExecutingOnAltStack) #endif // TARGET_UNIX #endif // CROSSGEN_COMPILE +void FatalErrorHandler(UINT errorCode, LPCWSTR pszMessage) +{ + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(errorCode, pszMessage); +} + void EEStartupHelper() { CONTRACTL @@ -670,6 +675,8 @@ void EEStartupHelper() // This needs to be done before the EE has started InitializeStartupFlags(); + IfFailGo(ExecutableAllocator::StaticInitialize(FatalErrorHandler)); + ThreadpoolMgr::StaticInitialize(); MethodDescBackpatchInfoTracker::StaticInitialize(); @@ -824,7 +831,7 @@ void EEStartupHelper() g_runtimeLoadedBaseAddress = (SIZE_T)pe.GetBase(); g_runtimeVirtualSize = (SIZE_T)pe.GetVirtualSize(); - InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64)); + ExecutableAllocator::InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64)); } #endif // !TARGET_UNIX diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp index 02feec829a76b..5c5004f56860a 100644 --- a/src/coreclr/vm/class.cpp +++ b/src/coreclr/vm/class.cpp @@ -153,7 +153,9 @@ void EEClass::Destruct(MethodTable * pOwningMT) if (pDelegateEEClass->m_pStaticCallStub) { - BOOL fStubDeleted = pDelegateEEClass->m_pStaticCallStub->DecRef(); + ExecutableWriterHolder stubWriterHolder(pDelegateEEClass->m_pStaticCallStub, sizeof(Stub)); + BOOL fStubDeleted = stubWriterHolder.GetRW()->DecRef(); + if (fStubDeleted) { DelegateInvokeStubManager::g_pManager->RemoveStub(pDelegateEEClass->m_pStaticCallStub); @@ -167,7 +169,6 @@ void EEClass::Destruct(MethodTable * pOwningMT) // it is owned by the m_pMulticastStubCache, not by the class // - it is shared across classes. So we don't decrement // its ref count here - delete pDelegateEEClass->m_pUMThunkMarshInfo; } #ifdef FEATURE_COMINTEROP diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 37220786fedda..78721292a3e9f 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -2139,8 +2139,7 @@ VOID EEJitManager::EnsureJumpStubReserve(BYTE * pImageBase, SIZE_T imageSize, SI return; // Unable to allocate the reserve - give up } - pNewReserve->m_ptr = ClrVirtualAllocWithinRange(loAddrCurrent, hiAddrCurrent, - allocChunk, MEM_RESERVE, PAGE_NOACCESS); + pNewReserve->m_ptr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(allocChunk, loAddrCurrent, hiAddrCurrent); if (pNewReserve->m_ptr != NULL) break; @@ -2231,8 +2230,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap if (!pInfo->getThrowOnOutOfMemoryWithinRange() && PEDecoder::GetForceRelocs()) RETURN NULL; #endif - pBaseAddr = ClrVirtualAllocWithinRange(loAddr, hiAddr, - reserveSize, MEM_RESERVE, PAGE_NOACCESS); + pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(reserveSize, loAddr, hiAddr); if (!pBaseAddr) { @@ -2251,7 +2249,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap } else { - pBaseAddr = ClrVirtualAllocExecutable(reserveSize, MEM_RESERVE, PAGE_NOACCESS); + pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(reserveSize); if (!pBaseAddr) ThrowOutOfMemory(); } @@ -2686,15 +2684,14 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo *pAllocatedSize = sizeof(CodeHeader) + totalSize; -#if defined(HOST_OSX) && defined(HOST_ARM64) -#define FEATURE_WXORX -#endif - -#ifdef FEATURE_WXORX - pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize]; -#else - pCodeHdrRW = pCodeHdr; -#endif + if (ExecutableAllocator::IsWXORXEnabled()) + { + pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize]; + } + else + { + pCodeHdrRW = pCodeHdr; + } #ifdef USE_INDIRECT_CODEHEADER if (requestInfo.IsDynamicDomain()) @@ -3347,7 +3344,7 @@ void EEJitManager::Unload(LoaderAllocator *pAllocator) } } - ResetCodeAllocHint(); + ExecutableAllocator::ResetCodeAllocHint(); } EEJitManager::DomainCodeHeapList::DomainCodeHeapList() diff --git a/src/coreclr/vm/comcallablewrapper.cpp b/src/coreclr/vm/comcallablewrapper.cpp index 8b95dac8cdd77..499880dc16dde 100644 --- a/src/coreclr/vm/comcallablewrapper.cpp +++ b/src/coreclr/vm/comcallablewrapper.cpp @@ -3183,12 +3183,11 @@ void ComMethodTable::Cleanup() if (m_pDispatchInfo) delete m_pDispatchInfo; - if (m_pMDescr) - DeleteExecutable(m_pMDescr); if (m_pITypeInfo && !g_fProcessDetach) SafeRelease(m_pITypeInfo); - DeleteExecutable(this); + // The m_pMDescr and the current instance is allocated from the related LoaderAllocator + // so no cleanup is needed here. } @@ -3214,7 +3213,7 @@ void ComMethodTable::LayOutClassMethodTable() SLOT *pComVtable; unsigned cbPrevSlots = 0; unsigned cbAlloc = 0; - NewExecutableHolder pMDMemoryPtr = NULL; + AllocMemHolder pMDMemoryPtr; BYTE* pMethodDescMemory = NULL; size_t writeableOffset = 0; unsigned cbNumParentVirtualMethods = 0; @@ -3321,7 +3320,7 @@ void ComMethodTable::LayOutClassMethodTable() cbAlloc = cbMethodDescs; if (cbAlloc > 0) { - pMDMemoryPtr = (BYTE*) new (executable) BYTE[cbAlloc + sizeof(UINT_PTR)]; + pMDMemoryPtr = m_pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbAlloc + sizeof(UINT_PTR))); pMethodDescMemory = pMDMemoryPtr; methodDescMemoryWriteableHolder = ExecutableWriterHolder(pMethodDescMemory, cbAlloc + sizeof(UINT_PTR)); @@ -3703,7 +3702,6 @@ BOOL ComMethodTable::LayOutInterfaceMethodTable(MethodTable* pClsMT) // Method descs are at the end of the vtable // m_cbSlots interfaces methods + IUnk methods pMethodDescMemory = (BYTE *)&pComVtable[m_cbSlots]; - for (i = 0; i < cbSlots; i++) { ComCallMethodDesc* pNewMD = (ComCallMethodDesc *) (pMethodDescMemory + COMMETHOD_PREPAD); @@ -4495,13 +4493,12 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForClass(MethodTable if (cbToAlloc.IsOverflow()) ThrowHR(COR_E_OVERFLOW); - NewExecutableHolder pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()]; + AllocMemHolder pComMT(pClassMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value()))); _ASSERTE(!cbNewSlots.IsOverflow() && !cbTotalSlots.IsOverflow() && !cbVtable.IsOverflow()); ExecutableWriterHolder comMTWriterHolder(pComMT, cbToAlloc.Value()); ComMethodTable* pComMTRW = comMTWriterHolder.GetRW(); - // set up the header pComMTRW->m_ptReserved = (SLOT)(size_t)0xDEADC0FF; // reserved pComMTRW->m_pMT = pClassMT; // pointer to the class method table @@ -4573,7 +4570,7 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForInterface(MethodT if (cbToAlloc.IsOverflow()) ThrowHR(COR_E_OVERFLOW); - NewExecutableHolder pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()]; + AllocMemHolder pComMT(pInterfaceMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value()))); _ASSERTE(!cbVtable.IsOverflow() && !cbMethDescs.IsOverflow()); @@ -4639,7 +4636,8 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForBasic(MethodTable unsigned cbVtable = cbExtraSlots * sizeof(SLOT); unsigned cbToAlloc = sizeof(ComMethodTable) + cbVtable; - NewExecutableHolder pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc]; + AllocMemHolder pComMT(pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc))); + ExecutableWriterHolder comMTWriterHolder(pComMT, cbToAlloc); ComMethodTable* pComMTRW = comMTWriterHolder.GetRW(); diff --git a/src/coreclr/vm/comcallablewrapper.h b/src/coreclr/vm/comcallablewrapper.h index 2581ddf832fd5..0f1e4b878e4c9 100644 --- a/src/coreclr/vm/comcallablewrapper.h +++ b/src/coreclr/vm/comcallablewrapper.h @@ -499,6 +499,7 @@ struct ComMethodTable // Accessor for the IDispatch information. DispatchInfo* GetDispatchInfo(); +#ifndef DACCESS_COMPILE LONG AddRef() { LIMITED_METHOD_CONTRACT; @@ -527,6 +528,7 @@ struct ComMethodTable return cbRef; } +#endif // DACCESS_COMPILE CorIfaceAttr GetInterfaceType() { @@ -746,6 +748,7 @@ struct ComMethodTable } +#ifndef DACCESS_COMPILE inline REFIID GetIID() { // Cannot use a normal CONTRACT since the return type is ref type which @@ -768,6 +771,7 @@ struct ComMethodTable return m_IID; } +#endif // DACCESS_COMPILE void CheckParentComVisibility(BOOL fForIDispatch) { diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index b6c17260a1302..1b61e16dec5d3 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -1253,7 +1253,7 @@ LPVOID COMDelegate::ConvertToCallback(OBJECTREF pDelegateObj) { GCX_PREEMP(); - pUMThunkMarshInfo = new UMThunkMarshInfo(); + pUMThunkMarshInfo = (UMThunkMarshInfo*)(void*)pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(sizeof(UMThunkMarshInfo))); ExecutableWriterHolder uMThunkMarshInfoWriterHolder(pUMThunkMarshInfo, sizeof(UMThunkMarshInfo)); uMThunkMarshInfoWriterHolder.GetRW()->LoadTimeInit(pInvokeMeth); diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index 9dae86aca9377..541d88dc16885 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -403,8 +403,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo) if (pInfo->m_loAddr != NULL || pInfo->m_hiAddr != NULL) { - m_pBaseAddr = ClrVirtualAllocWithinRange(pInfo->m_loAddr, pInfo->m_hiAddr, - ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS); + m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(ReserveBlockSize, pInfo->m_loAddr, pInfo->m_hiAddr); if (!m_pBaseAddr) { if (pInfo->getThrowOnOutOfMemoryWithinRange()) @@ -417,7 +416,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo) // top up the ReserveBlockSize to suggested minimum ReserveBlockSize = max(ReserveBlockSize, pInfo->getReserveSize()); - m_pBaseAddr = ClrVirtualAllocExecutable(ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS); + m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(ReserveBlockSize); if (!m_pBaseAddr) ThrowOutOfMemory(); } @@ -749,7 +748,7 @@ HostCodeHeap::TrackAllocation* HostCodeHeap::AllocMemory_NoThrow(size_t header, if (m_pLastAvailableCommittedAddr + sizeToCommit <= m_pBaseAddr + m_TotalBytesAvailable) { - if (NULL == ClrVirtualAlloc(m_pLastAvailableCommittedAddr, sizeToCommit, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) + if (NULL == ExecutableAllocator::Instance()->Commit(m_pLastAvailableCommittedAddr, sizeToCommit, true /* isExecutable */)) { LOG((LF_BCL, LL_ERROR, "CodeHeap [0x%p] - VirtualAlloc failed\n", this)); return NULL; diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 1b192e683695a..55828b7c22b86 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6679,14 +6679,12 @@ AdjustContextForJITHelpers( PCODE ip = GetIP(pContext); -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(ip)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame ip = AdjustWriteBarrierIP(ip); SetIP(pContext, ip); } -#endif // FEATURE_WRITEBARRIER_COPY #ifdef FEATURE_DATABREAKPOINT diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index 7fff234ca85ef..4af702fab1499 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -4694,14 +4694,12 @@ VOID DECLSPEC_NORETURN UnwindManagedExceptionPass1(PAL_SEHException& ex, CONTEXT break; } -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(controlPc)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame controlPc = AdjustWriteBarrierIP(controlPc); SetIP(frameContext, controlPc); } -#endif // FEATURE_WRITEBARRIER_COPY UINT_PTR sp = GetSP(frameContext); @@ -5174,13 +5172,11 @@ BOOL IsSafeToHandleHardwareException(PCONTEXT contextRecord, PEXCEPTION_RECORD e { PCODE controlPc = GetIP(contextRecord); -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(controlPc)) { // Pretend we were executing the barrier function at its original location controlPc = AdjustWriteBarrierIP(controlPc); } -#endif // FEATURE_WRITEBARRIER_COPY return g_fEEStarted && ( exceptionRecord->ExceptionCode == STATUS_BREAKPOINT || @@ -5259,14 +5255,12 @@ BOOL HandleHardwareException(PAL_SEHException* ex) { GCX_COOP(); // Must be cooperative to modify frame chain. -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(controlPc)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame controlPc = AdjustWriteBarrierIP(controlPc); SetIP(ex->GetContextRecord(), controlPc); } -#endif // FEATURE_WRITEBARRIER_COPY if (IsIPInMarkedJitHelper(controlPc)) { diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp index be856dbe1a63a..9ce0cc676f7a7 100644 --- a/src/coreclr/vm/gccover.cpp +++ b/src/coreclr/vm/gccover.cpp @@ -1258,9 +1258,9 @@ void RemoveGcCoverageInterrupt(TADDR instrPtr, BYTE * savedInstrPtr, GCCoverageI { ExecutableWriterHolder instrPtrWriterHolder((void*)instrPtr, 4); #ifdef TARGET_ARM - if (GetARMInstructionLength(savedInstrPtr) == 2) + if (GetARMInstructionLength(savedInstrPtr) == 2) *(WORD *)instrPtrWriterHolder.GetRW() = *(WORD *)savedInstrPtr; - else + else *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr; #elif defined(TARGET_ARM64) *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr; diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S index facce7cacd3ef..dc56da1d1779e 100644 --- a/src/coreclr/vm/i386/jithelp.S +++ b/src/coreclr/vm/i386/jithelp.S @@ -377,10 +377,27 @@ LEAF_ENTRY JIT_WriteBarrierGroup, _TEXT ret LEAF_END JIT_WriteBarrierGroup, _TEXT -#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS -// ******************************************************************************* -// Write barrier wrappers with fcall calling convention -// + .data + .align 4 + .global C_FUNC(JIT_WriteBarrierEAX_Loc) +C_FUNC(JIT_WriteBarrierEAX_Loc): + .word 0 + .text + +LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT + mov eax, edx + mov edx, ecx + push eax + call 1f +1: + pop eax +2: + add eax, offset _GLOBAL_OFFSET_TABLE_+1 // (2b - 1b) + mov eax, dword ptr [eax + C_FUNC(JIT_WriteBarrierEAX_Loc)@GOT] + xchg eax, dword ptr [esp] + ret +LEAF_END JIT_WriteBarrier_Callable, _TEXT + .macro UniversalWriteBarrierHelper name .align 4 @@ -392,6 +409,11 @@ LEAF_END JIT_\name, _TEXT .endm +#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS +// ******************************************************************************* +// Write barrier wrappers with fcall calling convention +// + // Only define these if we're using the ASM GC write barriers; if this flag is not defined, // we'll use C++ versions of these write barriers. UniversalWriteBarrierHelper CheckedWriteBarrier diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm index 3743ac3cbe02f..3650b3f2afd6d 100644 --- a/src/coreclr/vm/i386/jithelp.asm +++ b/src/coreclr/vm/i386/jithelp.asm @@ -411,15 +411,13 @@ ENDM ;******************************************************************************* ; Write barrier wrappers with fcall calling convention ; -UniversalWriteBarrierHelper MACRO name + + .data ALIGN 4 -PUBLIC @JIT_&name&@8 -@JIT_&name&@8 PROC - mov eax,edx - mov edx,ecx - jmp _JIT_&name&EAX@0 -@JIT_&name&@8 ENDP -ENDM + public _JIT_WriteBarrierEAX_Loc +_JIT_WriteBarrierEAX_Loc dd 0 + + .code ; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of ; WriteBarrier functions so can determine if got AV in them. @@ -429,6 +427,25 @@ _JIT_WriteBarrierGroup@0 PROC ret _JIT_WriteBarrierGroup@0 ENDP + ALIGN 4 +PUBLIC @JIT_WriteBarrier_Callable@8 +@JIT_WriteBarrier_Callable@8 PROC + mov eax,edx + mov edx,ecx + jmp DWORD PTR [_JIT_WriteBarrierEAX_Loc] + +@JIT_WriteBarrier_Callable@8 ENDP + +UniversalWriteBarrierHelper MACRO name + ALIGN 4 +PUBLIC @JIT_&name&@8 +@JIT_&name&@8 PROC + mov eax,edx + mov edx,ecx + jmp _JIT_&name&EAX@0 +@JIT_&name&@8 ENDP +ENDM + ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS ; Only define these if we're using the ASM GC write barriers; if this flag is not defined, ; we'll use C++ versions of these write barriers. @@ -1233,6 +1250,8 @@ fremloopd: ; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code. ; + ALIGN 4 + _JIT_PatchedCodeStart@0 proc public ret _JIT_PatchedCodeStart@0 endp diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp index cefe7ecadc5e9..c5ebf8e0cf15c 100644 --- a/src/coreclr/vm/i386/jitinterfacex86.cpp +++ b/src/coreclr/vm/i386/jitinterfacex86.cpp @@ -1039,10 +1039,18 @@ void InitJITHelpers1() { BYTE * pfunc = (BYTE *) JIT_WriteBarrierReg_PreGrow; - BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]); int reg = c_rgWriteBarrierRegs[iBarrier]; - memcpy(pBuf, pfunc, 34); + BYTE * pBufRW = pBuf; + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(pBuf, 34); + pBufRW = barrierWriterHolder.GetRW(); + } + + memcpy(pBufRW, pfunc, 34); // assert the copied code ends in a ret to make sure we got the right length _ASSERTE(pBuf[33] == 0xC3); @@ -1058,24 +1066,24 @@ void InitJITHelpers1() _ASSERTE(pBuf[0] == 0x89); // Update the reg field (bits 3..5) of the ModR/M byte of this instruction - pBuf[1] &= 0xc7; - pBuf[1] |= reg << 3; + pBufRW[1] &= 0xc7; + pBufRW[1] |= reg << 3; // Second instruction to patch is cmp reg, imm32 (low bound) _ASSERTE(pBuf[2] == 0x81); // Here the lowest three bits in ModR/M field are the register - pBuf[3] &= 0xf8; - pBuf[3] |= reg; + pBufRW[3] &= 0xf8; + pBufRW[3] |= reg; #ifdef WRITE_BARRIER_CHECK // Don't do the fancy optimization just jump to the old one // Use the slow one from time to time in a debug build because // there are some good asserts in the unoptimized one if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK) { - pfunc = &pBuf[0]; + pfunc = &pBufRW[0]; *pfunc++ = 0xE9; // JMP c_rgDebugWriteBarriers[iBarrier] - *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (pfunc + sizeof(DWORD)); + *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (&pBuf[1] + sizeof(DWORD)); } #endif // WRITE_BARRIER_CHECK } @@ -1121,7 +1129,7 @@ void ValidateWriteBarrierHelpers() #endif // WRITE_BARRIER_CHECK // first validate the PreGrow helper - BYTE* pWriteBarrierFunc = reinterpret_cast(JIT_WriteBarrierEAX); + BYTE* pWriteBarrierFunc = GetWriteBarrierCodeLocation(reinterpret_cast(JIT_WriteBarrierEAX)); // ephemeral region DWORD* pLocation = reinterpret_cast(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]); @@ -1159,7 +1167,7 @@ void ValidateWriteBarrierHelpers() #endif //CODECOVERAGE /*********************************************************************/ -#define WriteBarrierIsPreGrow() (((BYTE *)JIT_WriteBarrierEAX)[10] == 0xc1) +#define WriteBarrierIsPreGrow() ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[10] == 0xc1) /*********************************************************************/ @@ -1177,20 +1185,28 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) #ifdef WRITE_BARRIER_CHECK // Don't do the fancy optimization if we are checking write barrier - if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier + if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9) // we are using slow write barrier return stompWBCompleteActions; #endif // WRITE_BARRIER_CHECK // Update the lower bound. for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) { - BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]); + + BYTE * pBufRW = pBuf; + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(pBuf, 42); + pBufRW = barrierWriterHolder.GetRW(); + } // assert there is in fact a cmp r/m32, imm32 there _ASSERTE(pBuf[2] == 0x81); // Update the immediate which is the lower bound of the ephemeral generation - size_t *pfunc = (size_t *) &pBuf[AnyGrow_EphemeralLowerBound]; + size_t *pfunc = (size_t *) &pBufRW[AnyGrow_EphemeralLowerBound]; //avoid trivial self modifying code if (*pfunc != (size_t) g_ephemeral_low) { @@ -1203,7 +1219,7 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) _ASSERTE(pBuf[10] == 0x81); // Update the upper bound if we are using the PostGrow thunk. - pfunc = (size_t *) &pBuf[PostGrow_EphemeralUpperBound]; + pfunc = (size_t *) &pBufRW[PostGrow_EphemeralUpperBound]; //avoid trivial self modifying code if (*pfunc != (size_t) g_ephemeral_high) { @@ -1233,7 +1249,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) #ifdef WRITE_BARRIER_CHECK // Don't do the fancy optimization if we are checking write barrier - if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier + if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9) // we are using slow write barrier return stompWBCompleteActions; #endif // WRITE_BARRIER_CHECK @@ -1242,12 +1258,20 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) { - BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]); int reg = c_rgWriteBarrierRegs[iBarrier]; size_t *pfunc; - // Check if we are still using the pre-grow version of the write barrier. + BYTE * pBufRW = pBuf; + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(pBuf, 42); + pBufRW = barrierWriterHolder.GetRW(); + } + + // Check if we are still using the pre-grow version of the write barrier. if (bWriteBarrierIsPreGrow) { // Check if we need to use the upper bounds checking barrier stub. @@ -1260,7 +1284,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) } pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow; - memcpy(pBuf, pfunc, 42); + memcpy(pBufRW, pfunc, 42); // assert the copied code ends in a ret to make sure we got the right length _ASSERTE(pBuf[41] == 0xC3); @@ -1276,35 +1300,35 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) _ASSERTE(pBuf[0] == 0x89); // Update the reg field (bits 3..5) of the ModR/M byte of this instruction - pBuf[1] &= 0xc7; - pBuf[1] |= reg << 3; + pBufRW[1] &= 0xc7; + pBufRW[1] |= reg << 3; // Second instruction to patch is cmp reg, imm32 (low bound) _ASSERTE(pBuf[2] == 0x81); // Here the lowest three bits in ModR/M field are the register - pBuf[3] &= 0xf8; - pBuf[3] |= reg; + pBufRW[3] &= 0xf8; + pBufRW[3] |= reg; // Third instruction to patch is another cmp reg, imm32 (high bound) _ASSERTE(pBuf[10] == 0x81); // Here the lowest three bits in ModR/M field are the register - pBuf[11] &= 0xf8; - pBuf[11] |= reg; + pBufRW[11] &= 0xf8; + pBufRW[11] |= reg; bStompWriteBarrierEphemeral = true; // What we're trying to update is the offset field of a // cmp offset[edx], 0ffh instruction _ASSERTE(pBuf[22] == 0x80); - pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation]; *pfunc = (size_t) g_card_table; // What we're trying to update is the offset field of a // mov offset[edx], 0ffh instruction _ASSERTE(pBuf[34] == 0xC6); - pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation]; } else @@ -1313,14 +1337,14 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) // cmp offset[edx], 0ffh instruction _ASSERTE(pBuf[14] == 0x80); - pfunc = (size_t *) &pBuf[PreGrow_CardTableFirstLocation]; + pfunc = (size_t *) &pBufRW[PreGrow_CardTableFirstLocation]; *pfunc = (size_t) g_card_table; // What we're trying to update is the offset field of a // mov offset[edx], 0ffh instruction _ASSERTE(pBuf[26] == 0xC6); - pfunc = (size_t *) &pBuf[PreGrow_CardTableSecondLocation]; + pfunc = (size_t *) &pBufRW[PreGrow_CardTableSecondLocation]; } } else @@ -1329,13 +1353,13 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) // cmp offset[edx], 0ffh instruction _ASSERTE(pBuf[22] == 0x80); - pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation]; *pfunc = (size_t) g_card_table; // What we're trying to update is the offset field of a // mov offset[edx], 0ffh instruction _ASSERTE(pBuf[34] == 0xC6); - pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation]; } // Stick in the adjustment value. diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index 61c5dfd90cbfc..564363053fc6a 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -4829,7 +4829,7 @@ VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript) X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale); // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX) - X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0); + X86EmitCall(NewExternalCodeLabel((LPVOID) GetWriteBarrierCodeLocation(&JIT_WriteBarrierEAX)), 0); } else #else // TARGET_AMD64 diff --git a/src/coreclr/vm/i386/stublinkerx86.h b/src/coreclr/vm/i386/stublinkerx86.h index af5244d077193..564c999975e7c 100644 --- a/src/coreclr/vm/i386/stublinkerx86.h +++ b/src/coreclr/vm/i386/stublinkerx86.h @@ -536,7 +536,7 @@ struct StubPrecode { return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32)); } - +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -562,6 +562,7 @@ struct StubPrecode { ExecutableWriterHolder rel32Holder(&m_rel32, 4); return rel32SetInterlocked(&m_rel32, rel32Holder.GetRW(), target, expected, (MethodDesc*)GetMethodDesc()); } +#endif // !DACCESS_COMPILE }; IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);) IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);) @@ -646,6 +647,13 @@ struct FixupPrecode { return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); } + size_t GetSizeRW() + { + LIMITED_METHOD_CONTRACT; + + return GetBase() + sizeof(void*) - dac_cast(this); + } + TADDR GetMethodDesc(); #else // HAS_FIXUP_PRECODE_CHUNKS TADDR GetMethodDesc() diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index a1e4d93d881de..882e2c29cef04 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11875,7 +11875,7 @@ WORD CEEJitInfo::getRelocTypeHint(void * target) if (m_fAllowRel32) { // The JIT calls this method for data addresses only. It always uses REL32s for direct code targets. - if (IsPreferredExecutableRange(target)) + if (ExecutableAllocator::IsPreferredExecutableRange(target)) return IMAGE_REL_BASED_REL32; } #endif // TARGET_AMD64 diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index ca9d03c2141d3..cf9617a353282 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -238,15 +238,10 @@ extern "C" FCDECL2(Object*, ChkCastAny_NoCacheLookup, CORINFO_CLASS_HANDLE type, extern "C" FCDECL2(Object*, IsInstanceOfAny_NoCacheLookup, CORINFO_CLASS_HANDLE type, Object* obj); extern "C" FCDECL2(LPVOID, Unbox_Helper, CORINFO_CLASS_HANDLE type, Object* obj); -#if defined(TARGET_ARM64) || defined(FEATURE_WRITEBARRIER_COPY) // ARM64 JIT_WriteBarrier uses speciall ABI and thus is not callable directly // Copied write barriers must be called at a different location extern "C" FCDECL2(VOID, JIT_WriteBarrier_Callable, Object **dst, Object *ref); #define WriteBarrier_Helper JIT_WriteBarrier_Callable -#else -// in other cases the regular JIT helper is callable. -#define WriteBarrier_Helper JIT_WriteBarrier -#endif extern "C" FCDECL1(void, JIT_InternalThrow, unsigned exceptNum); extern "C" FCDECL1(void*, JIT_InternalThrowFromHelper, unsigned exceptNum); diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 4f222be4a2c03..0a77e4445f06f 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -1137,7 +1137,7 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory) _ASSERTE(dwTotalReserveMemSize <= VIRTUAL_ALLOC_RESERVE_GRANULARITY); #endif - BYTE * initReservedMem = ClrVirtualAllocExecutable(dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS); + BYTE * initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize); m_InitialReservedMemForLoaderHeaps = initReservedMem; @@ -1672,18 +1672,25 @@ void AssemblyLoaderAllocator::SetCollectible() { CONTRACTL { - THROWS; + NOTHROW; } CONTRACTL_END; m_IsCollectible = true; -#ifndef DACCESS_COMPILE - m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap); -#endif } #ifndef DACCESS_COMPILE +void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain) +{ + m_Id.Init(); + LoaderAllocator::Init((BaseDomain *)pAppDomain); + if (IsCollectible()) + { + m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap); + } +} + #ifndef CROSSGEN_COMPILE AssemblyLoaderAllocator::~AssemblyLoaderAllocator() diff --git a/src/coreclr/vm/loaderallocator.inl b/src/coreclr/vm/loaderallocator.inl index a826675ccc93c..993732d4010f8 100644 --- a/src/coreclr/vm/loaderallocator.inl +++ b/src/coreclr/vm/loaderallocator.inl @@ -21,12 +21,6 @@ inline void GlobalLoaderAllocator::Init(BaseDomain *pDomain) LoaderAllocator::Init(pDomain, m_ExecutableHeapInstance); } -inline void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain) -{ - m_Id.Init(); - LoaderAllocator::Init((BaseDomain *)pAppDomain); -} - inline BOOL LoaderAllocatorID::Equals(LoaderAllocatorID *pId) { LIMITED_METHOD_CONTRACT; diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index bd3984d8697cd..db308ab208a8e 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -4188,46 +4188,6 @@ c_CentralJumpCode = { }; #include -#elif defined(TARGET_AMD64) - -#include -static const struct CentralJumpCode { - BYTE m_movzxRAX[4]; - BYTE m_shlEAX[4]; - BYTE m_movRAX[2]; - MethodDesc* m_pBaseMD; - BYTE m_addR10RAX[3]; - BYTE m_jmp[1]; - INT32 m_rel32; - - inline void Setup(CentralJumpCode* pCodeRX, MethodDesc* pMD, PCODE target, LoaderAllocator *pLoaderAllocator) { - WRAPPER_NO_CONTRACT; - m_pBaseMD = pMD; - m_rel32 = rel32UsingJumpStub(&pCodeRX->m_rel32, target, pMD, pLoaderAllocator); - } - - inline BOOL CheckTarget(TADDR target) { - WRAPPER_NO_CONTRACT; - TADDR addr = rel32Decode(PTR_HOST_MEMBER_TADDR(CentralJumpCode, this, m_rel32)); - if (*PTR_BYTE(addr) == 0x48 && - *PTR_BYTE(addr+1) == 0xB8 && - *PTR_BYTE(addr+10) == 0xFF && - *PTR_BYTE(addr+11) == 0xE0) - { - addr = *PTR_TADDR(addr+2); - } - return (addr == target); - } -} -c_CentralJumpCode = { - { 0x48, 0x0F, 0xB6, 0xC0 }, // movzx rax,al - { 0x48, 0xC1, 0xE0, MethodDesc::ALIGNMENT_SHIFT }, // shl rax, MethodDesc::ALIGNMENT_SHIFT - { 0x49, 0xBA }, NULL, // mov r10, pBaseMD - { 0x4C, 0x03, 0xD0 }, // add r10,rax - { 0xE9 }, 0 // jmp PreStub -}; -#include - #elif defined(TARGET_ARM) #include diff --git a/src/coreclr/vm/precode.cpp b/src/coreclr/vm/precode.cpp index 80731c191e737..0bd2bd657f9ad 100644 --- a/src/coreclr/vm/precode.cpp +++ b/src/coreclr/vm/precode.cpp @@ -480,7 +480,9 @@ void Precode::Reset() #ifdef HAS_FIXUP_PRECODE_CHUNKS if (t == PRECODE_FIXUP) { - size = sizeof(FixupPrecode) + sizeof(PTR_MethodDesc); + // The writeable size the Init method accesses is dynamic depending on + // the FixupPrecode members. + size = ((FixupPrecode*)this)->GetSizeRW(); } else #endif diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp index 0971334af4d31..e61802b984950 100644 --- a/src/coreclr/vm/stackwalk.cpp +++ b/src/coreclr/vm/stackwalk.cpp @@ -713,14 +713,12 @@ UINT_PTR Thread::VirtualUnwindToFirstManagedCallFrame(T_CONTEXT* pContext) // get our caller's PSP, or our caller's caller's SP. while (!ExecutionManager::IsManagedCode(uControlPc)) { -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(uControlPc)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame uControlPc = AdjustWriteBarrierIP(uControlPc); SetIP(pContext, uControlPc); } -#endif // FEATURE_WRITEBARRIER_COPY #ifndef TARGET_UNIX uControlPc = VirtualUnwindCallFrame(pContext); diff --git a/src/coreclr/vm/stublink.cpp b/src/coreclr/vm/stublink.cpp index 04a33e3982613..304cb4fb35b44 100644 --- a/src/coreclr/vm/stublink.cpp +++ b/src/coreclr/vm/stublink.cpp @@ -846,7 +846,7 @@ Stub *StubLinker::Link(LoaderHeap *pHeap, DWORD flags) ); ASSERT(pStub != NULL); - bool fSuccess = EmitStub(pStub, globalsize, pHeap); + bool fSuccess = EmitStub(pStub, globalsize, size, pHeap); #ifdef STUBLINKER_GENERATES_UNWIND_INFO if (fSuccess) @@ -1007,13 +1007,13 @@ int StubLinker::CalculateSize(int* pGlobalSize) return globalsize + datasize; } -bool StubLinker::EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap) +bool StubLinker::EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap) { STANDARD_VM_CONTRACT; BYTE *pCode = (BYTE*)(pStub->GetBlob()); - ExecutableWriterHolder stubWriterHolder(pStub, sizeof(Stub)); + ExecutableWriterHolder stubWriterHolder(pStub, sizeof(Stub) + totalSize); Stub *pStubRW = stubWriterHolder.GetRW(); BYTE *pCodeRW = (BYTE*)(pStubRW->GetBlob()); @@ -2013,11 +2013,7 @@ VOID Stub::DeleteStub() FillMemory(this+1, m_numCodeBytes, 0xcc); #endif -#ifndef TARGET_UNIX - DeleteExecutable((BYTE*)GetAllocationBase()); -#else delete [] (BYTE*)GetAllocationBase(); -#endif } } @@ -2124,11 +2120,7 @@ Stub* Stub::NewStub(PTR_VOID pCode, DWORD flags) BYTE *pBlock; if (pHeap == NULL) { -#ifndef TARGET_UNIX - pBlock = new (executable) BYTE[totalSize]; -#else pBlock = new BYTE[totalSize]; -#endif } else { diff --git a/src/coreclr/vm/stublink.h b/src/coreclr/vm/stublink.h index 94326f9962ea7..9613fd48f687d 100644 --- a/src/coreclr/vm/stublink.h +++ b/src/coreclr/vm/stublink.h @@ -395,7 +395,7 @@ class StubLinker // Writes out the code element into memory following the // stub object. - bool EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap); + bool EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap); CodeRun *GetLastCodeRunIfAny(); diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 4dfa4a22b3fa4..2302617614efd 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -1078,18 +1078,30 @@ DWORD_PTR Thread::OBJREF_HASH = OBJREF_TABSIZE; extern "C" void STDCALL JIT_PatchedCodeStart(); extern "C" void STDCALL JIT_PatchedCodeLast(); -#ifdef FEATURE_WRITEBARRIER_COPY - static void* s_barrierCopy = NULL; BYTE* GetWriteBarrierCodeLocation(VOID* barrier) { - return (BYTE*)s_barrierCopy + ((BYTE*)barrier - (BYTE*)JIT_PatchedCodeStart); + if (IsWriteBarrierCopyEnabled()) + { + return (BYTE*)PINSTRToPCODE((TADDR)s_barrierCopy + ((TADDR)barrier - (TADDR)JIT_PatchedCodeStart)); + } + else + { + return (BYTE*)barrier; + } } BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc) { - return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart))); + if (IsWriteBarrierCopyEnabled()) + { + return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart))); + } + else + { + return FALSE; + } } PCODE AdjustWriteBarrierIP(PCODE controlPc) @@ -1100,14 +1112,21 @@ PCODE AdjustWriteBarrierIP(PCODE controlPc) return (PCODE)JIT_PatchedCodeStart + (controlPc - (PCODE)s_barrierCopy); } +#ifdef TARGET_X86 +extern "C" void *JIT_WriteBarrierEAX_Loc; +#else extern "C" void *JIT_WriteBarrier_Loc; +#endif + #ifdef TARGET_ARM64 extern "C" void (*JIT_WriteBarrier_Table)(); extern "C" void *JIT_WriteBarrier_Loc = 0; extern "C" void *JIT_WriteBarrier_Table_Loc = 0; #endif // TARGET_ARM64 -#endif // FEATURE_WRITEBARRIER_COPY +#ifdef TARGET_ARM +extern "C" void *JIT_WriteBarrier_Loc = 0; +#endif // TARGET_ARM #ifndef TARGET_UNIX // g_TlsIndex is only used by the DAC. Disable optimizations around it to prevent it from getting optimized out. @@ -1138,50 +1157,74 @@ void InitThreadManager() _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart > (ptrdiff_t)0); _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart < (ptrdiff_t)GetOsPageSize()); -#ifdef FEATURE_WRITEBARRIER_COPY - s_barrierCopy = ClrVirtualAlloc(NULL, g_SystemInfo.dwAllocationGranularity, MEM_COMMIT, PAGE_EXECUTE_READWRITE); - if (s_barrierCopy == NULL) - { - _ASSERTE(!"ClrVirtualAlloc of GC barrier code page failed"); - COMPlusThrowWin32(); - } - + if (IsWriteBarrierCopyEnabled()) { - size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart; - ExecutableWriterHolder barrierWriterHolder(s_barrierCopy, writeBarrierSize); - memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize); - } + s_barrierCopy = ExecutableAllocator::Instance()->Reserve(g_SystemInfo.dwAllocationGranularity); + ExecutableAllocator::Instance()->Commit(s_barrierCopy, g_SystemInfo.dwAllocationGranularity, true); + if (s_barrierCopy == NULL) + { + _ASSERTE(!"Allocation of GC barrier code page failed"); + COMPlusThrowWin32(); + } - // Store the JIT_WriteBarrier copy location to a global variable so that helpers - // can jump to it. - JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier); + { + size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart; + ExecutableWriterHolder barrierWriterHolder(s_barrierCopy, writeBarrierSize); + memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize); + } - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier)); + // Store the JIT_WriteBarrier copy location to a global variable so that helpers + // can jump to it. +#ifdef TARGET_X86 + JIT_WriteBarrierEAX_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EAX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX)); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ECX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierECX)); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBX)); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ESI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierESI)); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EDI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEDI)); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBP, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBP)); +#else // TARGET_X86 + JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier); +#endif // TARGET_X86 + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier)); #ifdef TARGET_ARM64 - // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. - JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table); - - SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier)); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier)); + // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. + JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table); #endif // TARGET_ARM64 -#else // FEATURE_WRITEBARRIER_COPY +#if defined(TARGET_ARM64) || defined(TARGET_ARM) + SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier)); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier)); +#endif // TARGET_ARM64 || TARGET_ARM - // I am using virtual protect to cover the entire range that this code falls in. - // + } + else + { + // I am using virtual protect to cover the entire range that this code falls in. + // - // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth, - // so instead we'll leave it writable from here forward. + // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth, + // so instead we'll leave it writable from here forward. - DWORD oldProt; - if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart, - PAGE_EXECUTE_READWRITE, &oldProt)) - { - _ASSERTE(!"ClrVirtualProtect of code page failed"); - COMPlusThrowWin32(); + DWORD oldProt; + if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart, + PAGE_EXECUTE_READWRITE, &oldProt)) + { + _ASSERTE(!"ClrVirtualProtect of code page failed"); + COMPlusThrowWin32(); + } + +#ifdef TARGET_X86 + JIT_WriteBarrierEAX_Loc = (void*)JIT_WriteBarrierEAX; +#else + JIT_WriteBarrier_Loc = (void*)JIT_WriteBarrier; +#endif +#ifdef TARGET_ARM64 + // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. + JIT_WriteBarrier_Table_Loc = (void*)&JIT_WriteBarrier_Table; +#endif // TARGET_ARM64 } -#endif // FEATURE_WRITEBARRIER_COPY #ifndef TARGET_UNIX _ASSERTE(GetThreadNULLOk() == NULL); diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 0aadbf40260ca..8a66c0555129d 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -6272,18 +6272,23 @@ class ThreadStateNCStackHolder BOOL Debug_IsLockedViaThreadSuspension(); -#ifdef FEATURE_WRITEBARRIER_COPY +inline BOOL IsWriteBarrierCopyEnabled() +{ +#ifdef DACCESS_COMPILE + return FALSE; +#else // DACCESS_COMPILE +#ifdef HOST_OSX + return TRUE; +#else + return ExecutableAllocator::IsWXORXEnabled(); +#endif +#endif // DACCESS_COMPILE +} BYTE* GetWriteBarrierCodeLocation(VOID* barrier); BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc); PCODE AdjustWriteBarrierIP(PCODE controlPc); -#else // FEATURE_WRITEBARRIER_COPY - -#define GetWriteBarrierCodeLocation(barrier) ((BYTE*)(barrier)) - -#endif // FEATURE_WRITEBARRIER_COPY - #if !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE) extern thread_local Thread* t_pStackWalkerWalkingThread; #define SET_THREAD_TYPE_STACKWALKER(pThread) t_pStackWalkerWalkingThread = pThread diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 95d568d641c73..6d4fdcffd62e0 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -641,7 +641,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA dwTotalReserveMemSize); } - initReservedMem = ClrVirtualAllocExecutable (dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS); + initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize); m_initialReservedMemForHeaps = (BYTE *) initReservedMem; From f7e73635ada7727eb90c1ec65337b90428682805 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 30 Jun 2021 13:34:24 +0200 Subject: [PATCH 2/8] Fix FreeBSD < ver 13 The memfd_create was introduced in 13.0 and it is equivalent to the shm_open(SHM_ANON, ...) --- src/coreclr/minipal/Unix/doublemapping.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp index 52f3809efb868..924196551e900 100644 --- a/src/coreclr/minipal/Unix/doublemapping.cpp +++ b/src/coreclr/minipal/Unix/doublemapping.cpp @@ -41,7 +41,11 @@ bool VMToOSInterface::CreateDoubleMemoryMapper(void** pHandle, size_t *pMaxExecu { #ifndef TARGET_OSX +#ifdef TARGET_FREEBSD + int fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, S_IRWXU); +#else // TARGET_FREEBSD int fd = memfd_create("doublemapper", MFD_CLOEXEC); +#endif // TARGET_FREEBSD if (fd == -1) { From 74c15e27d55f62bb70e0b5078676d4295c5496ed Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Thu, 8 Jul 2021 23:39:42 +0200 Subject: [PATCH 3/8] Reflect PR feedback and fix old macOS x64 --- src/coreclr/inc/CrstTypes.def | 2 +- src/coreclr/inc/clrconfigvalues.h | 2 +- src/coreclr/inc/crsttypes.h | 40 +++++++++--------- src/coreclr/minipal/Unix/doublemapping.cpp | 41 ++++++++++++++++++- src/coreclr/minipal/Windows/doublemapping.cpp | 41 ------------------- src/coreclr/utilcode/executableallocator.cpp | 2 +- src/coreclr/vm/jitinterface.h | 37 ++++++++--------- src/coreclr/vm/threads.cpp | 18 +++++--- 8 files changed, 92 insertions(+), 91 deletions(-) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 3b67b14834e29..74b8c165ca934 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -202,7 +202,7 @@ Crst Exception End Crst ExecutableAllocatorLock - Unordered + AcquiredAfter LoaderHeap End Crst ExecuteManRangeLock diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 1c57796cb2e39..bfd43629017a3 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -738,7 +738,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is // // Executable code // -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWXORX, W("EnableWXORX"), 1, "Enable W^X for executable memory."); +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 1, "Enable W^X for executable memory."); #ifdef FEATURE_GDBJIT /// diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h index 462a654a62c5f..015199f390fd0 100644 --- a/src/coreclr/inc/crsttypes.h +++ b/src/coreclr/inc/crsttypes.h @@ -151,8 +151,8 @@ int g_rgCrstLevelMap[] = 0, // CrstArgBasedStubCache 0, // CrstAssemblyList 12, // CrstAssemblyLoader - 3, // CrstAvailableClass - 4, // CrstAvailableParamTypes + 4, // CrstAvailableClass + 5, // CrstAvailableParamTypes 7, // CrstBaseDomain -1, // CrstCCompRC 13, // CrstClassFactInfoHash @@ -161,7 +161,7 @@ int g_rgCrstLevelMap[] = 6, // CrstCodeFragmentHeap 9, // CrstCodeVersioning 0, // CrstCOMCallWrapper - 4, // CrstCOMWrapperCache + 5, // CrstCOMWrapperCache 3, // CrstDataTest1 0, // CrstDataTest2 0, // CrstDbgTransport @@ -180,10 +180,10 @@ int g_rgCrstLevelMap[] = 18, // CrstEventPipe 0, // CrstEventStore 0, // CrstException - -1, // CrstExecutableAllocatorLock + 0, // CrstExecutableAllocatorLock 0, // CrstExecuteManRangeLock 0, // CrstExternalObjectContextCache - 3, // CrstFCall + 4, // CrstFCall 7, // CrstFuncPtrStubs 10, // CrstFusionAppCtx 10, // CrstGCCover @@ -198,25 +198,25 @@ int g_rgCrstLevelMap[] = 3, // CrstInlineTrackingMap 17, // CrstInstMethodHashTable 20, // CrstInterop - 4, // CrstInteropData + 5, // CrstInteropData 0, // CrstIsJMCMethod 7, // CrstISymUnmanagedReader 11, // CrstJit 0, // CrstJitGenericHandleCache 16, // CrstJitInlineTrackingMap - 3, // CrstJitPatchpoint + 4, // CrstJitPatchpoint -1, // CrstJitPerf 6, // CrstJumpStubCache 0, // CrstLeafLock -1, // CrstListLock 15, // CrstLoaderAllocator 16, // CrstLoaderAllocatorReferences - 0, // CrstLoaderHeap + 3, // CrstLoaderHeap 3, // CrstManagedObjectWrapperMap 14, // CrstMethodDescBackpatchInfoTracker - 4, // CrstModule + 5, // CrstModule 15, // CrstModuleFixup - 3, // CrstModuleLookupTable + 4, // CrstModuleLookupTable 0, // CrstMulticoreJitHash 13, // CrstMulticoreJitManager 0, // CrstNativeImageEagerFixups @@ -224,22 +224,22 @@ int g_rgCrstLevelMap[] = 0, // CrstNls 0, // CrstNotifyGdb 2, // CrstObjectList - 4, // CrstPEImage + 5, // CrstPEImage 19, // CrstPendingTypeLoadEntry - 3, // CrstPgoData + 4, // CrstPgoData 0, // CrstPinnedByrefValidation 0, // CrstProfilerGCRefDataFreeList 0, // CrstProfilingAPIStatus - 3, // CrstRCWCache + 4, // CrstRCWCache 0, // CrstRCWCleanupList 10, // CrstReadyToRunEntryPointToMethodDescMap 8, // CrstReflection 17, // CrstReJITGlobalRequest - 3, // CrstRetThunkCache + 4, // CrstRetThunkCache 3, // CrstSavedExceptionInfo 0, // CrstSaveModuleProfileData 0, // CrstSecurityStackwalkCache - 3, // CrstSigConvert + 4, // CrstSigConvert 5, // CrstSingleUseLock 0, // CrstSpecialStatics 0, // CrstStackSampler @@ -249,7 +249,7 @@ int g_rgCrstLevelMap[] = 4, // CrstStubUnwindInfoHeapSegments 3, // CrstSyncBlockCache 0, // CrstSyncHashLock - 4, // CrstSystemBaseDomain + 5, // CrstSystemBaseDomain 13, // CrstSystemDomain 0, // CrstSystemDomainDelayedUnloadList 0, // CrstThreadIdDispenser @@ -258,13 +258,13 @@ int g_rgCrstLevelMap[] = 13, // CrstThreadpoolWorker 12, // CrstThreadStore 8, // CrstTieredCompilation - 3, // CrstTypeEquivalenceMap + 4, // CrstTypeEquivalenceMap 10, // CrstTypeIDMap - 3, // CrstUMEntryThunkCache - 3, // CrstUniqueStack + 4, // CrstUMEntryThunkCache + 4, // CrstUniqueStack 7, // CrstUnresolvedClassLock 3, // CrstUnwindInfoTableLock - 3, // CrstVSDIndirectionCellLock + 4, // CrstVSDIndirectionCellLock 3, // CrstWrapperTemplate }; diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp index 924196551e900..a50b326861aad 100644 --- a/src/coreclr/minipal/Unix/doublemapping.cpp +++ b/src/coreclr/minipal/Unix/doublemapping.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef TARGET_LINUX #include #include // __NR_memfd_create @@ -77,6 +78,39 @@ void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle) extern "C" void* PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(const void* lpBeginAddress, const void* lpEndAddress, size_t dwSize); +#ifdef TARGET_OSX +bool IsMapJitFlagNeeded() +{ + static volatile int isMapJitFlagNeeded = -1; + + if (isMapJitFlagNeeded == -1) + { + int mapJitFlagCheckResult = 0; + int pageSize = sysconf(_SC_PAGE_SIZE); + // Try to map a page with read-write-execute protection. It should fail on Mojave hardened runtime and higher. + void* testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (testPage == MAP_FAILED && (errno == EACCES)) + { + // The mapping has failed with EACCES, check if making the same mapping with MAP_JIT flag works + testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT, -1, 0); + if (testPage != MAP_FAILED) + { + mapJitFlagCheckResult = 1; + } + } + + if (testPage != MAP_FAILED) + { + munmap(testPage, pageSize); + } + + isMapJitFlagNeeded = mapJitFlagCheckResult; + } + + return (bool)isMapJitFlagNeeded; +} +#endif // TARGET_OSX + void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd) { int fd = (int)(size_t)mapperHandle; @@ -103,7 +137,12 @@ void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offs #ifndef TARGET_OSX void* result = mmap(NULL, size, PROT_NONE, MAP_SHARED, fd, offset); #else - void* result = mmap(NULL, size, PROT_NONE, MAP_JIT | MAP_ANON | MAP_PRIVATE, -1, 0); + int mmapFlags = MAP_ANON | MAP_PRIVATE; + if (IsMapJitFlagNeeded()) + { + mmapFlags |= MAP_JIT; + } + void* result = mmap(NULL, size, PROT_NONE, mmapFlags, -1, 0); #endif if (result == MAP_FAILED) { diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp index 5edda681f2598..e265f1d139ad0 100644 --- a/src/coreclr/minipal/Windows/doublemapping.cpp +++ b/src/coreclr/minipal/Windows/doublemapping.cpp @@ -163,19 +163,6 @@ void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offs break; } -#ifdef _DEBUG - // if (ShouldInjectFaultInRange()) - // { - // // return nullptr (failure) - // faultInjected = true; - // break; - // } -#endif // _DEBUG - - // On UNIX we can also fail if our request size 'dwSize' is larger than 64K and - // and our tryAddr is pointing at a small MEM_FREE region (smaller than 'dwSize') - // However we can't distinguish between this and the race case. - // We might fail in a race. So just move on to next region and continue trying tryAddr = tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY; } @@ -187,35 +174,7 @@ void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offs } } - // STRESS_LOG7(LF_JIT, LL_INFO100, - // "ClrVirtualAllocWithinRange request #%u for %08x bytes in [ %p .. %p ], query count was %u - returned %s: %p\n", - // countOfCalls, (DWORD)dwSize, pMinAddr, pMaxAddr, - // virtualQueryCount, (pResult != nullptr) ? "success" : "failure", pResult); - - // If we failed this call the process will typically be terminated - // so we log any additional reason for failing this call. - // - if (pResult == nullptr) - { - // if ((tryAddr + dwSize) > (BYTE *)pMaxAddr) - // { - // // Our tryAddr reached pMaxAddr - // STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: Address space exhausted.\n"); - // } - - // if (virtualQueryFailed) - // { - // STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: VirtualQuery operation failed.\n"); - // } - - // if (faultInjected) - // { - // STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: fault injected.\n"); - // } - } - return pResult; - } void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable) diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp index 4d461e66e7e51..ac4c326c83784 100644 --- a/src/coreclr/utilcode/executableallocator.cpp +++ b/src/coreclr/utilcode/executableallocator.cpp @@ -154,7 +154,7 @@ HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandle LIMITED_METHOD_CONTRACT; g_fatalErrorHandler = fatalErrorHandler; - g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWXORX) != 0; + g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWriteXorExecute) != 0; g_instance = new (nothrow) ExecutableAllocator(); if (g_instance == NULL) { diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index cf9617a353282..e071d0717d179 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -339,28 +339,25 @@ EXTERN_C FCDECL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift); #ifdef TARGET_X86 +#define ENUM_X86_WRITE_BARRIER_REGISTERS() \ + X86_WRITE_BARRIER_REGISTER(EAX) \ + X86_WRITE_BARRIER_REGISTER(ECX) \ + X86_WRITE_BARRIER_REGISTER(EBX) \ + X86_WRITE_BARRIER_REGISTER(ESI) \ + X86_WRITE_BARRIER_REGISTER(EDI) \ + X86_WRITE_BARRIER_REGISTER(EBP) + extern "C" { - void STDCALL JIT_CheckedWriteBarrierEAX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierEBX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierECX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierESI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierEDI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierEBP(); // JIThelp.asm/JIThelp.s - - void STDCALL JIT_DebugWriteBarrierEAX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierEBX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierECX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierESI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierEDI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierEBP(); // JIThelp.asm/JIThelp.s - - void STDCALL JIT_WriteBarrierEAX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierEBX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierECX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierESI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierEDI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierEBP(); // JIThelp.asm/JIThelp.s + +// JIThelp.asm/JIThelp.s +#define X86_WRITE_BARRIER_REGISTER(reg) \ + void STDCALL JIT_CheckedWriteBarrier##reg(); \ + void STDCALL JIT_DebugWriteBarrier##reg(); \ + void STDCALL JIT_WriteBarrier##reg(); + + ENUM_X86_WRITE_BARRIER_REGISTERS() +#undef X86_WRITE_BARRIER_REGISTER void STDCALL JIT_WriteBarrierGroup(); void STDCALL JIT_WriteBarrierGroup_End(); diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 2302617614efd..c4a16d0b04484 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -1177,16 +1177,20 @@ void InitThreadManager() // can jump to it. #ifdef TARGET_X86 JIT_WriteBarrierEAX_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EAX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX)); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ECX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierECX)); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBX)); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ESI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierESI)); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EDI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEDI)); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBP, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBP)); + +#define X86_WRITE_BARRIER_REGISTER(reg) \ + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_##reg, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg)); \ + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg), W("@WriteBarrier" #reg)); + + ENUM_X86_WRITE_BARRIER_REGISTERS() + +#undef X86_WRITE_BARRIER_REGISTER + #else // TARGET_X86 JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier); #endif // TARGET_X86 SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier)); + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), W("@WriteBarrier")); #ifdef TARGET_ARM64 // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. @@ -1195,7 +1199,9 @@ void InitThreadManager() #if defined(TARGET_ARM64) || defined(TARGET_ARM) SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier)); + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier), W("@CheckedWriteBarrier")); SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier)); + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier), W("@ByRefWriteBarrier")); #endif // TARGET_ARM64 || TARGET_ARM } From 52a83d47ebfcf0d34a5b61931dcaf0882e5127ba Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Fri, 9 Jul 2021 09:40:25 +0200 Subject: [PATCH 4/8] Fix ExecutableAllocatorLock vs LeafLock ordering --- src/coreclr/inc/CrstTypes.def | 2 +- src/coreclr/inc/crsttypes.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 74b8c165ca934..04e3de46146bb 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -202,7 +202,7 @@ Crst Exception End Crst ExecutableAllocatorLock - AcquiredAfter LoaderHeap + AcquiredAfter LoaderHeap LeafLock End Crst ExecuteManRangeLock diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h index 015199f390fd0..b7809731e58c5 100644 --- a/src/coreclr/inc/crsttypes.h +++ b/src/coreclr/inc/crsttypes.h @@ -207,7 +207,7 @@ int g_rgCrstLevelMap[] = 4, // CrstJitPatchpoint -1, // CrstJitPerf 6, // CrstJumpStubCache - 0, // CrstLeafLock + 3, // CrstLeafLock -1, // CrstListLock 15, // CrstLoaderAllocator 16, // CrstLoaderAllocatorReferences From 605d67e9b40f219e3ccf59f5200e9f578bb8eb83 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Fri, 9 Jul 2021 16:12:37 +0200 Subject: [PATCH 5/8] Replace LeafLock in UMEntryThunkFreeList by a new lock Also update the ordering list of the ExecutableAllocatorLock --- src/coreclr/inc/CrstTypes.def | 5 ++++- src/coreclr/inc/crsttypes.h | 19 +++++++++++-------- src/coreclr/vm/dllimportcallback.cpp | 2 +- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 04e3de46146bb..c7266df7dbb01 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -202,7 +202,7 @@ Crst Exception End Crst ExecutableAllocatorLock - AcquiredAfter LoaderHeap LeafLock + AcquiredAfter LoaderHeap ArgBasedStubCache UMEntryThunkFreeListLock End Crst ExecuteManRangeLock @@ -509,6 +509,9 @@ Crst TypeEquivalenceMap AcquiredBefore LoaderHeap End +Crst UMEntryThunkFreeListLock +End + Crst UniqueStack AcquiredBefore LoaderHeap End diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h index b7809731e58c5..7be482c48bb55 100644 --- a/src/coreclr/inc/crsttypes.h +++ b/src/coreclr/inc/crsttypes.h @@ -130,12 +130,13 @@ enum CrstType CrstTypeEquivalenceMap = 112, CrstTypeIDMap = 113, CrstUMEntryThunkCache = 114, - CrstUniqueStack = 115, - CrstUnresolvedClassLock = 116, - CrstUnwindInfoTableLock = 117, - CrstVSDIndirectionCellLock = 118, - CrstWrapperTemplate = 119, - kNumberOfCrstTypes = 120 + CrstUMEntryThunkFreeListLock = 115, + CrstUniqueStack = 116, + CrstUnresolvedClassLock = 117, + CrstUnwindInfoTableLock = 118, + CrstVSDIndirectionCellLock = 119, + CrstWrapperTemplate = 120, + kNumberOfCrstTypes = 121 }; #endif // __CRST_TYPES_INCLUDED @@ -148,7 +149,7 @@ int g_rgCrstLevelMap[] = { 10, // CrstAppDomainCache 14, // CrstAppDomainHandleTable - 0, // CrstArgBasedStubCache + 3, // CrstArgBasedStubCache 0, // CrstAssemblyList 12, // CrstAssemblyLoader 4, // CrstAvailableClass @@ -207,7 +208,7 @@ int g_rgCrstLevelMap[] = 4, // CrstJitPatchpoint -1, // CrstJitPerf 6, // CrstJumpStubCache - 3, // CrstLeafLock + 0, // CrstLeafLock -1, // CrstListLock 15, // CrstLoaderAllocator 16, // CrstLoaderAllocatorReferences @@ -261,6 +262,7 @@ int g_rgCrstLevelMap[] = 4, // CrstTypeEquivalenceMap 10, // CrstTypeIDMap 4, // CrstUMEntryThunkCache + 3, // CrstUMEntryThunkFreeListLock 4, // CrstUniqueStack 7, // CrstUnresolvedClassLock 3, // CrstUnwindInfoTableLock @@ -386,6 +388,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstTypeEquivalenceMap", "CrstTypeIDMap", "CrstUMEntryThunkCache", + "CrstUMEntryThunkFreeListLock", "CrstUniqueStack", "CrstUnresolvedClassLock", "CrstUnwindInfoTableLock", diff --git a/src/coreclr/vm/dllimportcallback.cpp b/src/coreclr/vm/dllimportcallback.cpp index 4a88f81df5210..4f3cf879d10a4 100644 --- a/src/coreclr/vm/dllimportcallback.cpp +++ b/src/coreclr/vm/dllimportcallback.cpp @@ -41,7 +41,7 @@ class UMEntryThunkFreeList { WRAPPER_NO_CONTRACT; - m_crst.Init(CrstLeafLock, CRST_UNSAFE_ANYMODE); + m_crst.Init(CrstUMEntryThunkFreeListLock, CRST_UNSAFE_ANYMODE); } UMEntryThunk *GetUMEntryThunk() From 27252037fdd59b4b5444c2ac58c6829801b34922 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Sat, 10 Jul 2021 03:19:08 +0200 Subject: [PATCH 6/8] Fix host test, little cleanup --- src/coreclr/utilcode/loaderheap.cpp | 89 +++++++++++++++++++---------- src/coreclr/vm/virtualcallstub.cpp | 12 ++-- 2 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp index 5828763f512f2..90fc0c125c1f7 100644 --- a/src/coreclr/utilcode/loaderheap.cpp +++ b/src/coreclr/utilcode/loaderheap.cpp @@ -699,7 +699,7 @@ struct LoaderHeapFreeBlock size_t m_dwSize; // Total size of this block (including this header) //! Try not to grow the size of this structure. It places a minimum size on LoaderHeap allocations. - static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap) + static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMemRX, void *pMemRW, size_t dwTotalSize, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -722,18 +722,19 @@ struct LoaderHeapFreeBlock } #endif - INDEBUG(memset(pMem, 0xcc, dwTotalSize);) - LoaderHeapFreeBlock *pNewBlock = (LoaderHeapFreeBlock*)pMem; - pNewBlock->m_pNext = *ppHead; - pNewBlock->m_dwSize = dwTotalSize; - *ppHead = pNewBlock; + INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);) + LoaderHeapFreeBlock *pNewBlockRX = (LoaderHeapFreeBlock*)pMemRX; + LoaderHeapFreeBlock *pNewBlockRW = (LoaderHeapFreeBlock*)pMemRW; + pNewBlockRW->m_pNext = *ppHead; + pNewBlockRW->m_dwSize = dwTotalSize; + *ppHead = pNewBlockRX; - MergeBlock(pNewBlock, pHeap); + MergeBlock(pNewBlockRX, pNewBlockRW, pHeap); LOADER_HEAP_END_TRAP_FAULT } - +#ifndef DACCESS_COMPILE static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, BOOL fRemoveFromFreeList, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; @@ -755,7 +756,14 @@ struct LoaderHeapFreeBlock // Exact match. Hooray! if (fRemoveFromFreeList) { - *ppWalk = pCur->m_pNext; + ExecutableWriterHolder walkWriterHolder; + LoaderHeapFreeBlock **ppWalkRW = ppWalk; + if (pHeap->IsExecutable() && (ppWalk != ppHead)) + { + walkWriterHolder = ExecutableWriterHolder(ppWalk, sizeof(LoaderHeapFreeBlock **)); + ppWalkRW = walkWriterHolder.GetRW(); + } + *ppWalkRW = pCur->m_pNext; } break; } @@ -765,8 +773,24 @@ struct LoaderHeapFreeBlock pResult = pCur; if (fRemoveFromFreeList) { - *ppWalk = pCur->m_pNext; - InsertFreeBlock(ppWalk, ((BYTE*)pCur) + dwSize, dwCurSize - dwSize, pHeap ); + ExecutableWriterHolder walkWriterHolder; + LoaderHeapFreeBlock **ppWalkRW = ppWalk; + if (pHeap->IsExecutable() && (ppWalk != ppHead)) + { + walkWriterHolder = ExecutableWriterHolder(ppWalk, sizeof(LoaderHeapFreeBlock **)); + ppWalkRW = walkWriterHolder.GetRW(); + } + *ppWalkRW = pCur->m_pNext; + + void* pMem = (BYTE*)pCur + dwSize; + void* pMemRW = pMem; + ExecutableWriterHolder memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder = ExecutableWriterHolder(pMem, dwSize); + pMemRW = memWriterHolder.GetRW(); + } + InsertFreeBlock(ppWalkRW, pMem, pMemRW, dwCurSize - dwSize, pHeap ); } break; } @@ -779,8 +803,15 @@ struct LoaderHeapFreeBlock if (pResult && fRemoveFromFreeList) { + void *pResultRW = pResult; + ExecutableWriterHolder resultWriterHolder; + if (pHeap->IsExecutable()) + { + resultWriterHolder = ExecutableWriterHolder(pResult, dwSize); + pResultRW = resultWriterHolder.GetRW(); + } // Callers of loaderheap assume allocated memory is zero-inited so we must preserve this invariant! - memset(pResult, 0, dwSize); + memset(pResultRW, 0, dwSize); } LOADER_HEAP_END_TRAP_FAULT return pResult; @@ -788,11 +819,11 @@ struct LoaderHeapFreeBlock } - +#endif // DACCESS_COMPILE private: // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened. - static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap) + static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlockRX, LoaderHeapFreeBlock *pFreeBlockRW, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; @@ -800,10 +831,10 @@ struct LoaderHeapFreeBlock LOADER_HEAP_BEGIN_TRAP_FAULT - LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext; - size_t dwSize = pFreeBlock->m_dwSize; + LoaderHeapFreeBlock *pNextBlock = pFreeBlockRX->m_pNext; + size_t dwSize = pFreeBlockRX->m_dwSize; - if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlock) + dwSize)) + if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlockRX) + dwSize)) { result = FALSE; } @@ -811,9 +842,9 @@ struct LoaderHeapFreeBlock { size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize; LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext; - INDEBUG(memset(pFreeBlock, 0xcc, dwCombinedSize);) - pFreeBlock->m_pNext = pNextNextBlock; - pFreeBlock->m_dwSize = dwCombinedSize; + INDEBUG(memset(pFreeBlockRW, 0xcc, dwCombinedSize);) + pFreeBlockRW->m_pNext = pNextNextBlock; + pFreeBlockRW->m_dwSize = dwCombinedSize; result = TRUE; } @@ -1514,25 +1545,25 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, } #endif + void *pMemRW = pMem; + ExecutableWriterHolder memWriterHolder; + if (m_Options & LHF_EXECUTABLE) + { + memWriterHolder = ExecutableWriterHolder(pMem, dwSize); + pMemRW = memWriterHolder.GetRW(); + } + if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize )) { // Cool. This was the last block allocated. We can just undo the allocation instead // of going to the freelist. - void *pMemRW = pMem; - ExecutableWriterHolder memWriterHolder; - if (m_Options & LHF_EXECUTABLE) - { - memWriterHolder = ExecutableWriterHolder(pMem, dwSize); - pMemRW = memWriterHolder.GetRW(); - } memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 m_pAllocPtr = (BYTE*)pMem; } else { - LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, dwSize, this); + LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, pMemRW, dwSize, this); } - } diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 6d4fdcffd62e0..3af4c52afc9bb 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -2766,11 +2766,7 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStub(PCODE ad } #endif - ExecutableWriterHolder dispatchWriterHolder(holder, sizeof(DispatchHolder) -#ifdef TARGET_AMD64 - + sizeof(DispatchStubShort) -#endif - ); + ExecutableWriterHolder dispatchWriterHolder(holder, dispatchHolderSize); dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode, addrOfFail, (size_t)pMTExpected @@ -2833,9 +2829,9 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStubLong(PCODE } CONTRACT_END; //allocate from the requisite heap and copy the template over it. - DispatchHolder * holder = (DispatchHolder*) (void*) - dispatch_heap->AllocAlignedMem(DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG), CODE_SIZE_ALIGN); - ExecutableWriterHolder dispatchWriterHolder(holder, sizeof(DispatchHolder) + sizeof(DispatchStubLong)); + size_t dispatchHolderSize = DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG); + DispatchHolder * holder = (DispatchHolder*) (void*)dispatch_heap->AllocAlignedMem(dispatchHolderSize, CODE_SIZE_ALIGN); + ExecutableWriterHolder dispatchWriterHolder(holder, dispatchHolderSize); dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode, addrOfFail, From 357c2831663c73b978eef2bd03e0005df2ddb70e Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Sun, 11 Jul 2021 01:56:23 +0200 Subject: [PATCH 7/8] Fix ARM write barrier icache flushing Also allocate LoaderHeapFreeBlock from regular heap. --- src/coreclr/utilcode/loaderheap.cpp | 140 +++++++++++++--------------- src/coreclr/vm/arm/stubs.cpp | 4 + 2 files changed, 68 insertions(+), 76 deletions(-) diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp index 90fc0c125c1f7..b3b381b2f9bef 100644 --- a/src/coreclr/utilcode/loaderheap.cpp +++ b/src/coreclr/utilcode/loaderheap.cpp @@ -695,15 +695,21 @@ size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHeap); struct LoaderHeapFreeBlock { public: - LoaderHeapFreeBlock *m_pNext; // Pointer to next block on free list - size_t m_dwSize; // Total size of this block (including this header) -//! Try not to grow the size of this structure. It places a minimum size on LoaderHeap allocations. + LoaderHeapFreeBlock *m_pNext; // Pointer to next block on free list + size_t m_dwSize; // Total size of this block + void *m_pBlockAddress; // Virtual address of the block - static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMemRX, void *pMemRW, size_t dwTotalSize, UnlockedLoaderHeap *pHeap) +#ifndef DACCESS_COMPILE + static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; + // The new "nothrow" below failure is handled in a non-fault way, so + // make sure that callers with FORBID_FAULT can call this method without + // firing the contract violation assert. + PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure); + LOADER_HEAP_BEGIN_TRAP_FAULT // It's illegal to insert a free block that's smaller than the minimum sized allocation - @@ -722,20 +728,30 @@ struct LoaderHeapFreeBlock } #endif - INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);) - LoaderHeapFreeBlock *pNewBlockRX = (LoaderHeapFreeBlock*)pMemRX; - LoaderHeapFreeBlock *pNewBlockRW = (LoaderHeapFreeBlock*)pMemRW; - pNewBlockRW->m_pNext = *ppHead; - pNewBlockRW->m_dwSize = dwTotalSize; - *ppHead = pNewBlockRX; + void* pMemRW = pMem; + ExecutableWriterHolder memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder = ExecutableWriterHolder(pMem, dwTotalSize); + pMemRW = memWriterHolder.GetRW(); + } - MergeBlock(pNewBlockRX, pNewBlockRW, pHeap); + INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);) + LoaderHeapFreeBlock *pNewBlock = new (nothrow) LoaderHeapFreeBlock; + // If we fail allocating the LoaderHeapFreeBlock, ignore the failure and don't insert the free block at all. + if (pNewBlock != NULL) + { + pNewBlock->m_pNext = *ppHead; + pNewBlock->m_dwSize = dwTotalSize; + pNewBlock->m_pBlockAddress = pMem; + *ppHead = pNewBlock; + MergeBlock(pNewBlock, pHeap); + } LOADER_HEAP_END_TRAP_FAULT } -#ifndef DACCESS_COMPILE - static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, BOOL fRemoveFromFreeList, UnlockedLoaderHeap *pHeap) + static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -752,46 +768,19 @@ struct LoaderHeapFreeBlock size_t dwCurSize = pCur->m_dwSize; if (dwCurSize == dwSize) { - pResult = pCur; + pResult = pCur->m_pBlockAddress; // Exact match. Hooray! - if (fRemoveFromFreeList) - { - ExecutableWriterHolder walkWriterHolder; - LoaderHeapFreeBlock **ppWalkRW = ppWalk; - if (pHeap->IsExecutable() && (ppWalk != ppHead)) - { - walkWriterHolder = ExecutableWriterHolder(ppWalk, sizeof(LoaderHeapFreeBlock **)); - ppWalkRW = walkWriterHolder.GetRW(); - } - *ppWalkRW = pCur->m_pNext; - } + *ppWalk = pCur->m_pNext; + delete pCur; break; } else if (dwCurSize > dwSize && (dwCurSize - dwSize) >= AllocMem_TotalSize(1, pHeap)) { // Partial match. Ok... - pResult = pCur; - if (fRemoveFromFreeList) - { - ExecutableWriterHolder walkWriterHolder; - LoaderHeapFreeBlock **ppWalkRW = ppWalk; - if (pHeap->IsExecutable() && (ppWalk != ppHead)) - { - walkWriterHolder = ExecutableWriterHolder(ppWalk, sizeof(LoaderHeapFreeBlock **)); - ppWalkRW = walkWriterHolder.GetRW(); - } - *ppWalkRW = pCur->m_pNext; - - void* pMem = (BYTE*)pCur + dwSize; - void* pMemRW = pMem; - ExecutableWriterHolder memWriterHolder; - if (pHeap->IsExecutable()) - { - memWriterHolder = ExecutableWriterHolder(pMem, dwSize); - pMemRW = memWriterHolder.GetRW(); - } - InsertFreeBlock(ppWalkRW, pMem, pMemRW, dwCurSize - dwSize, pHeap ); - } + pResult = pCur->m_pBlockAddress; + *ppWalk = pCur->m_pNext; + InsertFreeBlock(ppWalk, ((BYTE*)pCur->m_pBlockAddress) + dwSize, dwCurSize - dwSize, pHeap ); + delete pCur; break; } @@ -801,7 +790,7 @@ struct LoaderHeapFreeBlock ppWalk = &( pCur->m_pNext ); } - if (pResult && fRemoveFromFreeList) + if (pResult) { void *pResultRW = pResult; ExecutableWriterHolder resultWriterHolder; @@ -815,15 +804,11 @@ struct LoaderHeapFreeBlock } LOADER_HEAP_END_TRAP_FAULT return pResult; - - - } -#endif // DACCESS_COMPILE private: // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened. - static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlockRX, LoaderHeapFreeBlock *pFreeBlockRW, UnlockedLoaderHeap *pHeap) + static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; @@ -831,10 +816,10 @@ struct LoaderHeapFreeBlock LOADER_HEAP_BEGIN_TRAP_FAULT - LoaderHeapFreeBlock *pNextBlock = pFreeBlockRX->m_pNext; - size_t dwSize = pFreeBlockRX->m_dwSize; + LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext; + size_t dwSize = pFreeBlock->m_dwSize; - if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlockRX) + dwSize)) + if (pNextBlock == NULL || ((BYTE*)pNextBlock->m_pBlockAddress) != (((BYTE*)pFreeBlock->m_pBlockAddress) + dwSize)) { result = FALSE; } @@ -842,9 +827,17 @@ struct LoaderHeapFreeBlock { size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize; LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext; - INDEBUG(memset(pFreeBlockRW, 0xcc, dwCombinedSize);) - pFreeBlockRW->m_pNext = pNextNextBlock; - pFreeBlockRW->m_dwSize = dwCombinedSize; + void *pMemRW = pFreeBlock->m_pBlockAddress; + ExecutableWriterHolder memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder = ExecutableWriterHolder(pFreeBlock->m_pBlockAddress, dwCombinedSize); + pMemRW = memWriterHolder.GetRW(); + } + INDEBUG(memset(pMemRW, 0xcc, dwCombinedSize);) + pFreeBlock->m_pNext = pNextNextBlock; + pFreeBlock->m_dwSize = dwCombinedSize; + delete pNextBlock; result = TRUE; } @@ -853,7 +846,7 @@ struct LoaderHeapFreeBlock return result; } - +#endif // DACCESS_COMPILE }; @@ -871,8 +864,7 @@ struct LoaderHeapFreeBlock // - z bytes of pad (DEBUG-ONLY) (where "z" is just enough to pointer-align the following byte) // - a bytes of tag (DEBUG-ONLY) (where "a" is sizeof(LoaderHeapValidationTag) // -// - b bytes of pad (if total size after all this < sizeof(LoaderHeapFreeBlock), pad enough to make it the size of LoaderHeapFreeBlock) -// - c bytes of pad (where "c" is just enough to pointer-align the following byte) +// - b bytes of pad (where "b" is just enough to pointer-align the following byte) // // ==> Following address is always pointer-aligned //===================================================================================== @@ -893,10 +885,6 @@ inline size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHe #ifdef _DEBUG dwSize += sizeof(LoaderHeapValidationTag); #endif - if (dwSize < sizeof(LoaderHeapFreeBlock)) - { - dwSize = sizeof(LoaderHeapFreeBlock); - } } dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); @@ -1345,7 +1333,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize { // Any memory available on the free list? - void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, TRUE /*fRemoveFromFreeList*/, this); + void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, this); if (!pData) { // Enough bytes available in committed region? @@ -1545,16 +1533,16 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, } #endif - void *pMemRW = pMem; - ExecutableWriterHolder memWriterHolder; - if (m_Options & LHF_EXECUTABLE) - { - memWriterHolder = ExecutableWriterHolder(pMem, dwSize); - pMemRW = memWriterHolder.GetRW(); - } - if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize )) { + void *pMemRW = pMem; + ExecutableWriterHolder memWriterHolder; + if (m_Options & LHF_EXECUTABLE) + { + memWriterHolder = ExecutableWriterHolder(pMem, dwSize); + pMemRW = memWriterHolder.GetRW(); + } + // Cool. This was the last block allocated. We can just undo the allocation instead // of going to the freelist. memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 @@ -1562,7 +1550,7 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, } else { - LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, pMemRW, dwSize, this); + LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, dwSize, this); } } diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index b2bf6e0522ea5..6e62df2370338 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -329,6 +329,10 @@ void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength) { DWORD size = (PBYTE)JIT_PatchedWriteBarrierLast - (PBYTE)JIT_PatchedWriteBarrierStart; *ppbStart = (PBYTE)JIT_PatchedWriteBarrierStart; + if (IsWriteBarrierCopyEnabled()) + { + *ppbStart = GetWriteBarrierCodeLocation(*ppbStart); + } *pcbLength = size; } From 8be6e1948793dd09a1d17f717043c86c54c2e6ca Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Sun, 11 Jul 2021 14:38:35 +0200 Subject: [PATCH 8/8] Set the W^X default to disabled --- src/coreclr/inc/clrconfigvalues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index bfd43629017a3..40da0cd2c7396 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -738,7 +738,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is // // Executable code // -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 1, "Enable W^X for executable memory."); +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 0, "Enable W^X for executable memory."); #ifdef FEATURE_GDBJIT ///