diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index 78aa969473525..b4a4859342702 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -119,6 +119,8 @@ add_subdirectory(pal/prebuilt/inc) add_subdirectory(debug/debug-pal) +add_subdirectory(minipal) + if(CLR_CMAKE_TARGET_WIN32) add_subdirectory(gc/sample) endif() @@ -171,6 +173,7 @@ include_directories("classlibnative/cryptography") include_directories("classlibnative/inc") include_directories("${GENERATED_INCLUDE_DIR}") include_directories("hosts/inc") +include_directories("minipal") if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE) include_directories("${GENERATED_INCLUDE_DIR}/etw") diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index eeb421cac4c2f..0485ff99a99eb 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -224,10 +224,6 @@ if(CLR_CMAKE_TARGET_WIN32) endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) endif(CLR_CMAKE_TARGET_WIN32) -if(CLR_CMAKE_TARGET_OSX) - add_definitions(-DFEATURE_WRITEBARRIER_COPY) -endif(CLR_CMAKE_TARGET_OSX) - if (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) add_compile_definitions($<$>>:FEATURE_EH_FUNCLETS>) endif (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32) diff --git a/src/coreclr/debug/ee/arm64/arm64walker.cpp b/src/coreclr/debug/ee/arm64/arm64walker.cpp index ae6e8c1fc2933..6c4dee9349700 100644 --- a/src/coreclr/debug/ee/arm64/arm64walker.cpp +++ b/src/coreclr/debug/ee/arm64/arm64walker.cpp @@ -171,7 +171,14 @@ BYTE* NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context, { CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, 0xd503201f); //Add Nop in buffer - m_pSharedPatchBypassBuffer->RipTargetFixup = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook +#if defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder ripTargetFixupWriterHolder(&m_pSharedPatchBypassBuffer->RipTargetFixup, sizeof(UINT_PTR)); + UINT_PTR *pRipTargetFixupRW = ripTargetFixupWriterHolder.GetRW(); +#else // HOST_OSX && HOST_ARM64 + UINT_PTR *pRipTargetFixupRW = &m_pSharedPatchBypassBuffer->RipTargetFixup; +#endif // HOST_OSX && HOST_ARM64 + + *pRipTargetFixupRW = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x is a Control Flow instr \n", opcode)); if (walk == WALK_CALL) //initialize Lr diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp index b17ae8f115002..f9304d16ab070 100644 --- a/src/coreclr/debug/ee/controller.cpp +++ b/src/coreclr/debug/ee/controller.cpp @@ -84,8 +84,13 @@ SharedPatchBypassBuffer* DebuggerControllerPatch::GetOrCreateSharedPatchBypassBu if (m_pSharedPatchBypassBuffer == NULL) { void *pSharedPatchBypassBufferRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(SharedPatchBypassBuffer)); +#if defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX, sizeof(SharedPatchBypassBuffer)); - new (sharedPatchBypassBufferWriterHolder.GetRW()) SharedPatchBypassBuffer(); + void *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW(); +#else // HOST_OSX && HOST_ARM64 + void *pSharedPatchBypassBufferRW = pSharedPatchBypassBufferRX; +#endif // HOST_OSX && HOST_ARM64 + new (pSharedPatchBypassBufferRW) SharedPatchBypassBuffer(); m_pSharedPatchBypassBuffer = (SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX; _ASSERTE(m_pSharedPatchBypassBuffer); @@ -4351,7 +4356,15 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, // m_pSharedPatchBypassBuffer = patch->GetOrCreateSharedPatchBypassBuffer(); - BYTE* patchBypass = m_pSharedPatchBypassBuffer->PatchBypass; +#if defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)m_pSharedPatchBypassBuffer, sizeof(SharedPatchBypassBuffer)); + SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW(); +#else // HOST_OSX && HOST_ARM64 + SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = m_pSharedPatchBypassBuffer; +#endif // HOST_OSX && HOST_ARM64 + + BYTE* patchBypassRX = m_pSharedPatchBypassBuffer->PatchBypass; + BYTE* patchBypassRW = pSharedPatchBypassBufferRW->PatchBypass; LOG((LF_CORDB, LL_INFO10000, "DPS::DPS: Patch skip for opcode 0x%.4x at address %p buffer allocated at 0x%.8x\n", patch->opcode, patch->address, m_pSharedPatchBypassBuffer)); // Copy the instruction block over to the patch skip @@ -4367,19 +4380,19 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, // the 2nd skip executes the new jump-stamp code and not the original method prologue code. Copying // the code every time ensures that we have the most up-to-date version of the code in the buffer. _ASSERTE( patch->IsBound() ); - CopyInstructionBlock(patchBypass, (const BYTE *)patch->address); + CopyInstructionBlock(patchBypassRW, (const BYTE *)patch->address); // Technically, we could create a patch skipper for an inactive patch, but we rely on the opcode being // set here. _ASSERTE( patch->IsActivated() ); - CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, patch->opcode); + CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypassRW, patch->opcode); LOG((LF_CORDB, LL_EVERYTHING, "SetInstruction was called\n")); // // Look at instruction to get some attributes // - NativeWalker::DecodeInstructionForPatchSkip(patchBypass, &(m_instrAttrib)); + NativeWalker::DecodeInstructionForPatchSkip(patchBypassRX, &(m_instrAttrib)); #if defined(TARGET_AMD64) @@ -4395,33 +4408,33 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, // Populate the RIP-relative buffer with the current value if needed // - BYTE* bufferBypass = m_pSharedPatchBypassBuffer->BypassBuffer; + BYTE* bufferBypassRW = pSharedPatchBypassBufferRW->BypassBuffer; // Overwrite the *signed* displacement. - int dwOldDisp = *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]); + int dwOldDisp = *(int*)(&patchBypassRX[m_instrAttrib.m_dwOffsetToDisp]); int dwNewDisp = offsetof(SharedPatchBypassBuffer, BypassBuffer) - (offsetof(SharedPatchBypassBuffer, PatchBypass) + m_instrAttrib.m_cbInstr); - *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp; + *(int*)(&patchBypassRW[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp; // This could be an LEA, which we'll just have to change into a MOV // and copy the original address - if (((patchBypass[0] == 0x4C) || (patchBypass[0] == 0x48)) && (patchBypass[1] == 0x8d)) + if (((patchBypassRX[0] == 0x4C) || (patchBypassRX[0] == 0x48)) && (patchBypassRX[1] == 0x8d)) { - patchBypass[1] = 0x8b; // MOV reg, mem + patchBypassRW[1] = 0x8b; // MOV reg, mem _ASSERTE((int)sizeof(void*) <= SharedPatchBypassBuffer::cbBufferBypass); - *(void**)bufferBypass = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); + *(void**)bufferBypassRW = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); } else { _ASSERTE(m_instrAttrib.m_cOperandSize <= SharedPatchBypassBuffer::cbBufferBypass); // Copy the data into our buffer. - memcpy(bufferBypass, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize); + memcpy(bufferBypassRW, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize); if (m_instrAttrib.m_fIsWrite) { // save the actual destination address and size so when we TriggerSingleStep() we can update the value - m_pSharedPatchBypassBuffer->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); - m_pSharedPatchBypassBuffer->RipTargetFixupSize = m_instrAttrib.m_cOperandSize; + pSharedPatchBypassBufferRW->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp); + pSharedPatchBypassBufferRW->RipTargetFixupSize = m_instrAttrib.m_cOperandSize; } } } @@ -4490,17 +4503,17 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread, #else // FEATURE_EMULATE_SINGLESTEP #ifdef TARGET_ARM64 - patchBypass = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode); + patchBypassRX = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode); #endif //TARGET_ARM64 //set eip to point to buffer... - SetIP(context, (PCODE)patchBypass); + SetIP(context, (PCODE)patchBypassRX); if (context ==(T_CONTEXT*) &c) thread->SetThreadContext(&c); - LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypass, patch->opcode)); + LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypassRX, patch->opcode)); // // Turn on single step (if the platform supports it) so we can diff --git a/src/coreclr/debug/ee/controller.h b/src/coreclr/debug/ee/controller.h index 12b1106f7a4b2..6996439c31fba 100644 --- a/src/coreclr/debug/ee/controller.h +++ b/src/coreclr/debug/ee/controller.h @@ -266,14 +266,28 @@ class SharedPatchBypassBuffer LONG AddRef() { - LONG newRefCount = InterlockedIncrement(&m_refCount); +#if !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder refCountWriterHolder(&m_refCount, sizeof(LONG)); + LONG *pRefCountRW = refCountWriterHolder.GetRW(); +#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + LONG *pRefCountRW = &m_refCount; +#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + + LONG newRefCount = InterlockedIncrement(pRefCountRW); _ASSERTE(newRefCount > 0); return newRefCount; } LONG Release() { - LONG newRefCount = InterlockedDecrement(&m_refCount); +#if !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + ExecutableWriterHolder refCountWriterHolder(&m_refCount, sizeof(LONG)); + LONG *pRefCountRW = refCountWriterHolder.GetRW(); +#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + LONG *pRefCountRW = &m_refCount; +#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + + LONG newRefCount = InterlockedDecrement(pRefCountRW); _ASSERTE(newRefCount >= 0); if (newRefCount == 0) diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp index 53ee5555ace43..e4563a31757f4 100644 --- a/src/coreclr/debug/ee/debugger.cpp +++ b/src/coreclr/debug/ee/debugger.cpp @@ -1317,13 +1317,19 @@ DebuggerEval::DebuggerEval(CONTEXT * pContext, DebuggerIPCE_FuncEvalInfo * pEval // Allocate the breakpoint instruction info in executable memory. void *bpInfoSegmentRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(DebuggerEvalBreakpointInfoSegment)); + +#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder bpInfoSegmentWriterHolder((DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX, sizeof(DebuggerEvalBreakpointInfoSegment)); - new (bpInfoSegmentWriterHolder.GetRW()) DebuggerEvalBreakpointInfoSegment(this); + DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = bpInfoSegmentWriterHolder.GetRW(); +#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX; +#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64 + new (bpInfoSegmentRW) DebuggerEvalBreakpointInfoSegment(this); m_bpInfoSegment = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX; // This must be non-zero so that the saved opcode is non-zero, and on IA64 we want it to be 0x16 // so that we can have a breakpoint instruction in any slot in the bundle. - bpInfoSegmentWriterHolder.GetRW()->m_breakpointInstruction[0] = 0x16; + bpInfoSegmentRW->m_breakpointInstruction[0] = 0x16; #if defined(TARGET_ARM) USHORT *bp = (USHORT*)&m_bpInfoSegment->m_breakpointInstruction; *bp = CORDbg_BREAK_INSTRUCTION; @@ -16234,6 +16240,7 @@ void Debugger::ReleaseDebuggerDataLock(Debugger *pDebugger) } #endif // DACCESS_COMPILE +#ifndef DACCESS_COMPILE /* ------------------------------------------------------------------------ * * Functions for DebuggerHeap executable memory allocations * ------------------------------------------------------------------------ */ @@ -16378,6 +16385,7 @@ void* DebuggerHeapExecutableMemoryAllocator::GetPointerToChunkWithUsageUpdate(De return page->GetPointerToChunk(chunkNumber); } +#endif // DACCESS_COMPILE /* ------------------------------------------------------------------------ * * DebuggerHeap impl @@ -16412,7 +16420,7 @@ void DebuggerHeap::Destroy() m_hHeap = NULL; } #endif -#ifndef HOST_WINDOWS +#if !defined(HOST_WINDOWS) && !defined(DACCESS_COMPILE) if (m_execMemAllocator != NULL) { delete m_execMemAllocator; @@ -16439,6 +16447,8 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable) } CONTRACTL_END; +#ifndef DACCESS_COMPILE + // Have knob catch if we don't want to lazy init the debugger. _ASSERTE(!g_DbgShouldntUseDebugger); m_fExecutable = fExecutable; @@ -16472,7 +16482,9 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable) return E_OUTOFMEMORY; } } -#endif +#endif + +#endif // !DACCESS_COMPILE return S_OK; } @@ -16549,7 +16561,10 @@ void *DebuggerHeap::Alloc(DWORD size) size += sizeof(InteropHeapCanary); #endif - void *ret; + void *ret = NULL; + +#ifndef DACCESS_COMPILE + #ifdef USE_INTEROPSAFE_HEAP _ASSERTE(m_hHeap != NULL); ret = ::HeapAlloc(m_hHeap, HEAP_ZERO_MEMORY, size); @@ -16585,7 +16600,7 @@ void *DebuggerHeap::Alloc(DWORD size) InteropHeapCanary * pCanary = InteropHeapCanary::GetFromRawAddr(ret); ret = pCanary->GetUserAddr(); #endif - +#endif // !DACCESS_COMPILE return ret; } @@ -16638,6 +16653,8 @@ void DebuggerHeap::Free(void *pMem) } CONTRACTL_END; +#ifndef DACCESS_COMPILE + #ifdef USE_INTEROPSAFE_CANARY // Check for canary @@ -16673,6 +16690,7 @@ void DebuggerHeap::Free(void *pMem) #endif // HOST_WINDOWS } #endif +#endif // !DACCESS_COMPILE } #ifndef DACCESS_COMPILE diff --git a/src/coreclr/debug/ee/debugger.h b/src/coreclr/debug/ee/debugger.h index f16f8cd6d9d9d..5503de2459099 100644 --- a/src/coreclr/debug/ee/debugger.h +++ b/src/coreclr/debug/ee/debugger.h @@ -1054,6 +1054,8 @@ constexpr uint64_t CHUNKS_PER_DEBUGGERHEAP=(DEBUGGERHEAP_PAGESIZE / EXPECTED_CHU constexpr uint64_t MAX_CHUNK_MASK=((1ull << CHUNKS_PER_DEBUGGERHEAP) - 1); constexpr uint64_t BOOKKEEPING_CHUNK_MASK (1ull << (CHUNKS_PER_DEBUGGERHEAP - 1)); +#ifndef DACCESS_COMPILE + // Forward declaration struct DebuggerHeapExecutableMemoryPage; @@ -1110,8 +1112,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage inline void SetNextPage(DebuggerHeapExecutableMemoryPage* nextPage) { +#if defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); - debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.nextPage = nextPage; + DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW(); +#else + DebuggerHeapExecutableMemoryPage *pHeapPageRW = this; +#endif + pHeapPageRW->chunks[0].bookkeeping.nextPage = nextPage; } inline uint64_t GetPageOccupancy() const @@ -1124,8 +1131,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage // Can't unset the bookmark chunk! ASSERT((newOccupancy & BOOKKEEPING_CHUNK_MASK) != 0); ASSERT(newOccupancy <= MAX_CHUNK_MASK); +#if defined(HOST_OSX) && defined(HOST_ARM64) ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); - debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.pageOccupancy = newOccupancy; + DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW(); +#else + DebuggerHeapExecutableMemoryPage *pHeapPageRW = this; +#endif + pHeapPageRW->chunks[0].bookkeeping.pageOccupancy = newOccupancy; } inline void* GetPointerToChunk(int chunkNum) const @@ -1136,14 +1148,18 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage DebuggerHeapExecutableMemoryPage() { - ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); - SetPageOccupancy(BOOKKEEPING_CHUNK_MASK); // only the first bit is set. +#if defined(HOST_OSX) && defined(HOST_ARM64) + ExecutableWriterHolder debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage)); + DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW(); +#else + DebuggerHeapExecutableMemoryPage *pHeapPageRW = this; +#endif for (uint8_t i = 1; i < CHUNKS_PER_DEBUGGERHEAP; i++) { ASSERT(i != 0); - debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.startOfPage = this; - debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.chunkNumber = i; + pHeapPageRW->chunks[i].data.startOfPage = this; + pHeapPageRW->chunks[i].data.chunkNumber = i; } } @@ -1190,6 +1206,8 @@ class DebuggerHeapExecutableMemoryAllocator Crst m_execMemAllocMutex; }; +#endif // DACCESS_COMPILE + // ------------------------------------------------------------------------ * // DebuggerHeap class // For interop debugging, we need a heap that: @@ -1201,6 +1219,8 @@ class DebuggerHeapExecutableMemoryAllocator #define USE_INTEROPSAFE_HEAP #endif +class DebuggerHeapExecutableMemoryAllocator; + class DebuggerHeap { public: diff --git a/src/coreclr/debug/inc/amd64/primitives.h b/src/coreclr/debug/inc/amd64/primitives.h index d8d14b24b5425..9d363938519c7 100644 --- a/src/coreclr/debug/inc/amd64/primitives.h +++ b/src/coreclr/debug/inc/amd64/primitives.h @@ -12,10 +12,6 @@ #ifndef PRIMITIVES_H_ #define PRIMITIVES_H_ -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) -#include "executableallocator.h" -#endif - #ifndef CORDB_ADDRESS_TYPE typedef const BYTE CORDB_ADDRESS_TYPE; typedef DPTR(CORDB_ADDRESS_TYPE) PTR_CORDB_ADDRESS_TYPE; @@ -191,14 +187,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address) { LIMITED_METHOD_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE); - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch) + *((unsigned char*)address) = 0xCC; // int 3 (single byte patch) FlushInstructionCache(GetCurrentProcess(), address, 1); } @@ -209,14 +198,7 @@ inline void CORDbgSetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address, // In a DAC build, this function assumes the input is an host address. LIMITED_METHOD_DAC_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder instructionWriterHolder(address, sizeof(unsigned char)); - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - *((unsigned char*)addressRW) = + *((unsigned char*)address) = (unsigned char) instruction; // setting one byte is important FlushInstructionCache(GetCurrentProcess(), address, 1); diff --git a/src/coreclr/debug/inc/arm/primitives.h b/src/coreclr/debug/inc/arm/primitives.h index c4e2d28602e56..269281eb006be 100644 --- a/src/coreclr/debug/inc/arm/primitives.h +++ b/src/coreclr/debug/inc/arm/primitives.h @@ -12,10 +12,6 @@ #ifndef PRIMITIVES_H_ #define PRIMITIVES_H_ -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) -#include "executableallocator.h" -#endif - #ifndef THUMB_CODE #define THUMB_CODE 1 #endif @@ -163,14 +159,7 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address, // In a DAC build, this function assumes the input is an host address. LIMITED_METHOD_DAC_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder instructionWriterHolder(address, sizeof(PRD_TYPE)); - CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)addressRW; + CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)address; _ASSERTE(ptraddr & THUMB_CODE); ptraddr &= ~THUMB_CODE; diff --git a/src/coreclr/debug/inc/arm64/primitives.h b/src/coreclr/debug/inc/arm64/primitives.h index 4f4c3f7bcd8f2..05c03c7b3094f 100644 --- a/src/coreclr/debug/inc/arm64/primitives.h +++ b/src/coreclr/debug/inc/arm64/primitives.h @@ -150,13 +150,13 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address, // In a DAC build, this function assumes the input is an host address. LIMITED_METHOD_DAC_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) +#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX) ExecutableWriterHolder instructionWriterHolder((LPVOID)address, sizeof(PRD_TYPE)); ULONGLONG ptraddr = dac_cast(instructionWriterHolder.GetRW()); -#else // !DBI_COMPILE && !DACCESS_COMPILE +#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX ULONGLONG ptraddr = dac_cast(address); -#endif // !DBI_COMPILE && !DACCESS_COMPILE +#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX *(PRD_TYPE *)ptraddr = instruction; FlushInstructionCache(GetCurrentProcess(), address, diff --git a/src/coreclr/debug/inc/i386/primitives.h b/src/coreclr/debug/inc/i386/primitives.h index 313b42c5a1970..2f228b3a3a9a1 100644 --- a/src/coreclr/debug/inc/i386/primitives.h +++ b/src/coreclr/debug/inc/i386/primitives.h @@ -12,10 +12,6 @@ #ifndef PRIMITIVES_H_ #define PRIMITIVES_H_ -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) -#include "executableallocator.h" -#endif - typedef const BYTE CORDB_ADDRESS_TYPE; typedef DPTR(CORDB_ADDRESS_TYPE) PTR_CORDB_ADDRESS_TYPE; @@ -151,14 +147,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address) { LIMITED_METHOD_CONTRACT; -#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) - ExecutableWriterHolder breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE); - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW(); -#else // !DBI_COMPILE && !DACCESS_COMPILE - UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address; -#endif // !DBI_COMPILE && !DACCESS_COMPILE - - *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch) + *((unsigned char*)address) = 0xCC; // int 3 (single byte patch) FlushInstructionCache(GetCurrentProcess(), address, 1); } diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt index fae55ecdc3ea5..9b8e4b649864d 100644 --- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt @@ -109,6 +109,7 @@ set(CORECLR_LIBRARIES v3binder System.Globalization.Native-Static interop + coreclrminipal ) if(CLR_CMAKE_TARGET_WIN32) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index c48872a0b9424..c7266df7dbb01 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -201,6 +201,10 @@ End Crst Exception End +Crst ExecutableAllocatorLock + AcquiredAfter LoaderHeap ArgBasedStubCache UMEntryThunkFreeListLock +End + Crst ExecuteManRangeLock End @@ -505,6 +509,9 @@ Crst TypeEquivalenceMap AcquiredBefore LoaderHeap End +Crst UMEntryThunkFreeListLock +End + Crst UniqueStack AcquiredBefore LoaderHeap End diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 3f21e41dfa369..40da0cd2c7396 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -735,6 +735,10 @@ RETAIL_CONFIG_STRING_INFO(EXTERNAL_DOTNET_DiagnosticPorts, W("DiagnosticPorts"), RETAIL_CONFIG_STRING_INFO(INTERNAL_LTTngConfig, W("LTTngConfig"), "Configuration for LTTng.") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is set to 0, this will prevent the LTTng library from being loaded at runtime") +// +// Executable code +// +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 0, "Enable W^X for executable memory."); #ifdef FEATURE_GDBJIT /// diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h index a1bab2ecb906c..7be482c48bb55 100644 --- a/src/coreclr/inc/crsttypes.h +++ b/src/coreclr/inc/crsttypes.h @@ -49,92 +49,94 @@ enum CrstType CrstEventPipe = 31, CrstEventStore = 32, CrstException = 33, - CrstExecuteManRangeLock = 34, - CrstExternalObjectContextCache = 35, - CrstFCall = 36, - CrstFuncPtrStubs = 37, - CrstFusionAppCtx = 38, - CrstGCCover = 39, - CrstGlobalStrLiteralMap = 40, - CrstHandleTable = 41, - CrstHostAssemblyMap = 42, - CrstHostAssemblyMapAdd = 43, - CrstIbcProfile = 44, - CrstIJWFixupData = 45, - CrstIJWHash = 46, - CrstILStubGen = 47, - CrstInlineTrackingMap = 48, - CrstInstMethodHashTable = 49, - CrstInterop = 50, - CrstInteropData = 51, - CrstIsJMCMethod = 52, - CrstISymUnmanagedReader = 53, - CrstJit = 54, - CrstJitGenericHandleCache = 55, - CrstJitInlineTrackingMap = 56, - CrstJitPatchpoint = 57, - CrstJitPerf = 58, - CrstJumpStubCache = 59, - CrstLeafLock = 60, - CrstListLock = 61, - CrstLoaderAllocator = 62, - CrstLoaderAllocatorReferences = 63, - CrstLoaderHeap = 64, - CrstManagedObjectWrapperMap = 65, - CrstMethodDescBackpatchInfoTracker = 66, - CrstModule = 67, - CrstModuleFixup = 68, - CrstModuleLookupTable = 69, - CrstMulticoreJitHash = 70, - CrstMulticoreJitManager = 71, - CrstNativeImageEagerFixups = 72, - CrstNativeImageLoad = 73, - CrstNls = 74, - CrstNotifyGdb = 75, - CrstObjectList = 76, - CrstPEImage = 77, - CrstPendingTypeLoadEntry = 78, - CrstPgoData = 79, - CrstPinnedByrefValidation = 80, - CrstProfilerGCRefDataFreeList = 81, - CrstProfilingAPIStatus = 82, - CrstRCWCache = 83, - CrstRCWCleanupList = 84, - CrstReadyToRunEntryPointToMethodDescMap = 85, - CrstReflection = 86, - CrstReJITGlobalRequest = 87, - CrstRetThunkCache = 88, - CrstSavedExceptionInfo = 89, - CrstSaveModuleProfileData = 90, - CrstSecurityStackwalkCache = 91, - CrstSigConvert = 92, - CrstSingleUseLock = 93, - CrstSpecialStatics = 94, - CrstStackSampler = 95, - CrstStressLog = 96, - CrstStubCache = 97, - CrstStubDispatchCache = 98, - CrstStubUnwindInfoHeapSegments = 99, - CrstSyncBlockCache = 100, - CrstSyncHashLock = 101, - CrstSystemBaseDomain = 102, - CrstSystemDomain = 103, - CrstSystemDomainDelayedUnloadList = 104, - CrstThreadIdDispenser = 105, - CrstThreadpoolTimerQueue = 106, - CrstThreadpoolWaitThreads = 107, - CrstThreadpoolWorker = 108, - CrstThreadStore = 109, - CrstTieredCompilation = 110, - CrstTypeEquivalenceMap = 111, - CrstTypeIDMap = 112, - CrstUMEntryThunkCache = 113, - CrstUniqueStack = 114, - CrstUnresolvedClassLock = 115, - CrstUnwindInfoTableLock = 116, - CrstVSDIndirectionCellLock = 117, - CrstWrapperTemplate = 118, - kNumberOfCrstTypes = 119 + CrstExecutableAllocatorLock = 34, + CrstExecuteManRangeLock = 35, + CrstExternalObjectContextCache = 36, + CrstFCall = 37, + CrstFuncPtrStubs = 38, + CrstFusionAppCtx = 39, + CrstGCCover = 40, + CrstGlobalStrLiteralMap = 41, + CrstHandleTable = 42, + CrstHostAssemblyMap = 43, + CrstHostAssemblyMapAdd = 44, + CrstIbcProfile = 45, + CrstIJWFixupData = 46, + CrstIJWHash = 47, + CrstILStubGen = 48, + CrstInlineTrackingMap = 49, + CrstInstMethodHashTable = 50, + CrstInterop = 51, + CrstInteropData = 52, + CrstIsJMCMethod = 53, + CrstISymUnmanagedReader = 54, + CrstJit = 55, + CrstJitGenericHandleCache = 56, + CrstJitInlineTrackingMap = 57, + CrstJitPatchpoint = 58, + CrstJitPerf = 59, + CrstJumpStubCache = 60, + CrstLeafLock = 61, + CrstListLock = 62, + CrstLoaderAllocator = 63, + CrstLoaderAllocatorReferences = 64, + CrstLoaderHeap = 65, + CrstManagedObjectWrapperMap = 66, + CrstMethodDescBackpatchInfoTracker = 67, + CrstModule = 68, + CrstModuleFixup = 69, + CrstModuleLookupTable = 70, + CrstMulticoreJitHash = 71, + CrstMulticoreJitManager = 72, + CrstNativeImageEagerFixups = 73, + CrstNativeImageLoad = 74, + CrstNls = 75, + CrstNotifyGdb = 76, + CrstObjectList = 77, + CrstPEImage = 78, + CrstPendingTypeLoadEntry = 79, + CrstPgoData = 80, + CrstPinnedByrefValidation = 81, + CrstProfilerGCRefDataFreeList = 82, + CrstProfilingAPIStatus = 83, + CrstRCWCache = 84, + CrstRCWCleanupList = 85, + CrstReadyToRunEntryPointToMethodDescMap = 86, + CrstReflection = 87, + CrstReJITGlobalRequest = 88, + CrstRetThunkCache = 89, + CrstSavedExceptionInfo = 90, + CrstSaveModuleProfileData = 91, + CrstSecurityStackwalkCache = 92, + CrstSigConvert = 93, + CrstSingleUseLock = 94, + CrstSpecialStatics = 95, + CrstStackSampler = 96, + CrstStressLog = 97, + CrstStubCache = 98, + CrstStubDispatchCache = 99, + CrstStubUnwindInfoHeapSegments = 100, + CrstSyncBlockCache = 101, + CrstSyncHashLock = 102, + CrstSystemBaseDomain = 103, + CrstSystemDomain = 104, + CrstSystemDomainDelayedUnloadList = 105, + CrstThreadIdDispenser = 106, + CrstThreadpoolTimerQueue = 107, + CrstThreadpoolWaitThreads = 108, + CrstThreadpoolWorker = 109, + CrstThreadStore = 110, + CrstTieredCompilation = 111, + CrstTypeEquivalenceMap = 112, + CrstTypeIDMap = 113, + CrstUMEntryThunkCache = 114, + CrstUMEntryThunkFreeListLock = 115, + CrstUniqueStack = 116, + CrstUnresolvedClassLock = 117, + CrstUnwindInfoTableLock = 118, + CrstVSDIndirectionCellLock = 119, + CrstWrapperTemplate = 120, + kNumberOfCrstTypes = 121 }; #endif // __CRST_TYPES_INCLUDED @@ -147,11 +149,11 @@ int g_rgCrstLevelMap[] = { 10, // CrstAppDomainCache 14, // CrstAppDomainHandleTable - 0, // CrstArgBasedStubCache + 3, // CrstArgBasedStubCache 0, // CrstAssemblyList 12, // CrstAssemblyLoader - 3, // CrstAvailableClass - 4, // CrstAvailableParamTypes + 4, // CrstAvailableClass + 5, // CrstAvailableParamTypes 7, // CrstBaseDomain -1, // CrstCCompRC 13, // CrstClassFactInfoHash @@ -160,7 +162,7 @@ int g_rgCrstLevelMap[] = 6, // CrstCodeFragmentHeap 9, // CrstCodeVersioning 0, // CrstCOMCallWrapper - 4, // CrstCOMWrapperCache + 5, // CrstCOMWrapperCache 3, // CrstDataTest1 0, // CrstDataTest2 0, // CrstDbgTransport @@ -179,9 +181,10 @@ int g_rgCrstLevelMap[] = 18, // CrstEventPipe 0, // CrstEventStore 0, // CrstException + 0, // CrstExecutableAllocatorLock 0, // CrstExecuteManRangeLock 0, // CrstExternalObjectContextCache - 3, // CrstFCall + 4, // CrstFCall 7, // CrstFuncPtrStubs 10, // CrstFusionAppCtx 10, // CrstGCCover @@ -196,25 +199,25 @@ int g_rgCrstLevelMap[] = 3, // CrstInlineTrackingMap 17, // CrstInstMethodHashTable 20, // CrstInterop - 4, // CrstInteropData + 5, // CrstInteropData 0, // CrstIsJMCMethod 7, // CrstISymUnmanagedReader 11, // CrstJit 0, // CrstJitGenericHandleCache 16, // CrstJitInlineTrackingMap - 3, // CrstJitPatchpoint + 4, // CrstJitPatchpoint -1, // CrstJitPerf 6, // CrstJumpStubCache 0, // CrstLeafLock -1, // CrstListLock 15, // CrstLoaderAllocator 16, // CrstLoaderAllocatorReferences - 0, // CrstLoaderHeap + 3, // CrstLoaderHeap 3, // CrstManagedObjectWrapperMap 14, // CrstMethodDescBackpatchInfoTracker - 4, // CrstModule + 5, // CrstModule 15, // CrstModuleFixup - 3, // CrstModuleLookupTable + 4, // CrstModuleLookupTable 0, // CrstMulticoreJitHash 13, // CrstMulticoreJitManager 0, // CrstNativeImageEagerFixups @@ -222,22 +225,22 @@ int g_rgCrstLevelMap[] = 0, // CrstNls 0, // CrstNotifyGdb 2, // CrstObjectList - 4, // CrstPEImage + 5, // CrstPEImage 19, // CrstPendingTypeLoadEntry - 3, // CrstPgoData + 4, // CrstPgoData 0, // CrstPinnedByrefValidation 0, // CrstProfilerGCRefDataFreeList 0, // CrstProfilingAPIStatus - 3, // CrstRCWCache + 4, // CrstRCWCache 0, // CrstRCWCleanupList 10, // CrstReadyToRunEntryPointToMethodDescMap 8, // CrstReflection 17, // CrstReJITGlobalRequest - 3, // CrstRetThunkCache + 4, // CrstRetThunkCache 3, // CrstSavedExceptionInfo 0, // CrstSaveModuleProfileData 0, // CrstSecurityStackwalkCache - 3, // CrstSigConvert + 4, // CrstSigConvert 5, // CrstSingleUseLock 0, // CrstSpecialStatics 0, // CrstStackSampler @@ -247,7 +250,7 @@ int g_rgCrstLevelMap[] = 4, // CrstStubUnwindInfoHeapSegments 3, // CrstSyncBlockCache 0, // CrstSyncHashLock - 4, // CrstSystemBaseDomain + 5, // CrstSystemBaseDomain 13, // CrstSystemDomain 0, // CrstSystemDomainDelayedUnloadList 0, // CrstThreadIdDispenser @@ -256,13 +259,14 @@ int g_rgCrstLevelMap[] = 13, // CrstThreadpoolWorker 12, // CrstThreadStore 8, // CrstTieredCompilation - 3, // CrstTypeEquivalenceMap + 4, // CrstTypeEquivalenceMap 10, // CrstTypeIDMap - 3, // CrstUMEntryThunkCache - 3, // CrstUniqueStack + 4, // CrstUMEntryThunkCache + 3, // CrstUMEntryThunkFreeListLock + 4, // CrstUniqueStack 7, // CrstUnresolvedClassLock 3, // CrstUnwindInfoTableLock - 3, // CrstVSDIndirectionCellLock + 4, // CrstVSDIndirectionCellLock 3, // CrstWrapperTemplate }; @@ -303,6 +307,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstEventPipe", "CrstEventStore", "CrstException", + "CrstExecutableAllocatorLock", "CrstExecuteManRangeLock", "CrstExternalObjectContextCache", "CrstFCall", @@ -383,6 +388,7 @@ LPCSTR g_rgCrstNameMap[] = "CrstTypeEquivalenceMap", "CrstTypeIDMap", "CrstUMEntryThunkCache", + "CrstUMEntryThunkFreeListLock", "CrstUniqueStack", "CrstUnresolvedClassLock", "CrstUnwindInfoTableLock", diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h index ce0c6c22f890e..101178f9a4ef0 100644 --- a/src/coreclr/inc/executableallocator.h +++ b/src/coreclr/inc/executableallocator.h @@ -11,6 +11,191 @@ #include "utilcode.h" #include "ex.h" +#include "minipal.h" + +#ifndef DACCESS_COMPILE + +// This class is responsible for allocation of all the executable memory in the runtime. +class ExecutableAllocator +{ + // RX address range block descriptor + struct BlockRX + { + // Next block in a linked list + BlockRX* next; + // Base address of the block + void* baseRX; + // Size of the block + size_t size; + // Offset of the block in the shared memory + size_t offset; + }; + + // RW address range block descriptor + struct BlockRW + { + // Next block in a linked list + BlockRW* next; + // Base address of the RW mapping of the block + void* baseRW; + // Base address of the RX mapping of the block + void* baseRX; + // Size of the block + size_t size; + // Usage reference count of the RW block. RW blocks can be reused + // when multiple mappings overlap in the VA space at the same time + // (even from multiple threads) + size_t refCount; + }; + + typedef void (*FatalErrorHandler)(UINT errorCode, LPCWSTR pszMessage); + + // Instance of the allocator + static ExecutableAllocator* g_instance; + + // Callback to the runtime to report fatal errors + static FatalErrorHandler g_fatalErrorHandler; + +#if USE_UPPER_ADDRESS + // Preferred region to allocate the code in. + static BYTE* g_codeMinAddr; + static BYTE* g_codeMaxAddr; + static BYTE* g_codeAllocStart; + // Next address to try to allocate for code in the preferred region. + static BYTE* g_codeAllocHint; +#endif // USE_UPPER_ADDRESS + + // Caches the COMPlus_EnableWXORX setting + static bool g_isWXorXEnabled; + + // Head of the linked list of all RX blocks that were allocated by this allocator + BlockRX* m_pFirstBlockRX = NULL; + + // Head of the linked list of free RX blocks that were allocated by this allocator and then backed out + BlockRX* m_pFirstFreeBlockRX = NULL; + + // Head of the linked list of currently mapped RW blocks + BlockRW* m_pFirstBlockRW = NULL; + + // Handle of the double mapped memory mapper + void *m_doubleMemoryMapperHandle = NULL; + + // Maximum size of executable memory this allocator can allocate + size_t m_maxExecutableCodeSize; + + // First free offset in the underlying shared memory. It is not used + // for platforms that don't use shared memory. + size_t m_freeOffset = 0; + + // Last RW mapping cached so that it can be reused for the next mapping + // request if it goes into the same range. + BlockRW* m_cachedMapping = NULL; + + // Synchronization of the public allocator methods + CRITSEC_COOKIE m_CriticalSection; + + // Update currently cached mapping. If the passed in block is the same as the one + // in the cache, it keeps it cached. Otherwise it destroys the currently cached one + // and replaces it by the passed in one. + void UpdateCachedMapping(BlockRW *pBlock); + + // Find existing RW block that maps the whole specified range of RX memory. + // Return NULL if no such block exists. + void* FindRWBlock(void* baseRX, size_t size); + + // Add RW block to the list of existing RW blocks + bool AddRWBlock(void* baseRW, void* baseRX, size_t size); + + // Remove RW block from the list of existing RW blocks and return the base + // address and size the underlying memory was mapped at. + // Return false if no existing RW block contains the passed in address. + bool RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize); + + // Find a free block with the closest size >= the requested size. + // Returns NULL if no such block exists. + BlockRX* FindBestFreeBlock(size_t size); + + // Return memory mapping granularity. + static size_t Granularity(); + + // Allocate a block of executable memory of the specified size. + // It doesn't acquire the actual virtual memory, just the + // range of the underlying shared memory. + BlockRX* AllocateBlock(size_t size, bool* pIsFreeBlock); + + // Backout the block allocated by AllocateBlock in case of an + // error. + void BackoutBlock(BlockRX* pBlock, bool isFreeBlock); + + // Allocate range of offsets in the underlying shared memory + bool AllocateOffset(size_t* pOffset, size_t size); + + // Add RX block to the linked list of existing blocks + void AddRXBlock(BlockRX *pBlock); + + // Return true if double mapping is enabled. + static bool IsDoubleMappingEnabled(); + + // Initialize the allocator instance + bool Initialize(); + +public: + + // Return the ExecuteAllocator singleton instance + static ExecutableAllocator* Instance(); + + // Initialize the static members of the Executable allocator and allocate + // and initialize the instance of it. + static HRESULT StaticInitialize(FatalErrorHandler fatalErrorHandler); + + // Destroy the allocator + ~ExecutableAllocator(); + + // Return true if W^X is enabled + static bool IsWXORXEnabled(); + + // Use this function to initialize the g_codeAllocHint + // during startup. base is runtime .dll base address, + // size is runtime .dll virtual size. + static void InitCodeAllocHint(size_t base, size_t size, int randomPageOffset); + + // Use this function to reset the g_codeAllocHint + // after unloading an AppDomain + static void ResetCodeAllocHint(); + + // Returns TRUE if p is located in near clr.dll that allows us + // to use rel32 IP-relative addressing modes. + static bool IsPreferredExecutableRange(void* p); + + // Reserve the specified amount of virtual address space for executable mapping. + void* Reserve(size_t size); + + // Reserve the specified amount of virtual address space for executable mapping. + // The reserved range must be within the loAddress and hiAddress. If it is not + // possible to reserve memory in such range, the method returns NULL. + void* ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress); + + // Reserve the specified amount of virtual address space for executable mapping + // exactly at the given address. + void* ReserveAt(void* baseAddressRX, size_t size); + + // Commit the specified range of memory. The memory can be committed as executable (RX) + // or non-executable (RW) based on the passed in isExecutable flag. The non-executable + // allocations are used to allocate data structures that need to be close to the + // executable code due to memory addressing performance related reasons. + void* Commit(void* pStart, size_t size, bool isExecutable); + + // Release the executable memory block starting at the passed in address that was allocated + // by one of the ReserveXXX methods. + void Release(void* pRX); + + // Map the specified block of executable memory as RW + void* MapRW(void* pRX, size_t size); + + // Unmap the RW mapping at the specified address + void UnmapRW(void* pRW); +}; + // Holder class to map read-execute memory as read-write so that it can be modified without using read-write-execute mapping. // At the moment the implementation is dummy, returning the same addresses for both cases and expecting them to be read-write-execute. // The class uses the move semantics to ensure proper unmapping in case of re-assigning of the holder value. @@ -30,13 +215,17 @@ class ExecutableWriterHolder void Unmap() { +#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE) if (m_addressRX != NULL) { - // TODO: mapping / unmapping for targets using double memory mapping will be added with the double mapped allocator addition -#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE) PAL_JitWriteProtect(false); -#endif } +#else + if (m_addressRX != m_addressRW) + { + ExecutableAllocator::Instance()->UnmapRW((void*)m_addressRW); + } +#endif } public: @@ -62,9 +251,11 @@ class ExecutableWriterHolder ExecutableWriterHolder(T* addressRX, size_t size) { m_addressRX = addressRX; +#if defined(HOST_OSX) && defined(HOST_ARM64) m_addressRW = addressRX; -#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE) PAL_JitWriteProtect(true); +#else + m_addressRW = (T *)ExecutableAllocator::Instance()->MapRW((void*)addressRX, size); #endif } @@ -79,3 +270,5 @@ class ExecutableWriterHolder return m_addressRW; } }; + +#endif // !DACCESS_COMPILE diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index fb65ea9fa613c..3c42f0850850b 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -302,12 +302,12 @@ #endif // !FEATURE_EH_FUNCLETS #ifdef TARGET_X86 - JITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB) - JITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB) JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, JIT_CheckedWriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB) JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, JIT_CheckedWriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB) diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h index a47034ee2e05c..77df9dfa94d2a 100644 --- a/src/coreclr/inc/utilcode.h +++ b/src/coreclr/inc/utilcode.h @@ -1014,35 +1014,6 @@ void SplitPath(__in SString const &path, #define CLRGetTickCount64() GetTickCount64() -// -// Use this function to initialize the s_CodeAllocHint -// during startup. base is runtime .dll base address, -// size is runtime .dll virtual size. -// -void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset); - - -// -// Use this function to reset the s_CodeAllocHint -// after unloading an AppDomain -// -void ResetCodeAllocHint(); - -// -// Returns TRUE if p is located in near clr.dll that allows us -// to use rel32 IP-relative addressing modes. -// -BOOL IsPreferredExecutableRange(void * p); - -// -// Allocate free memory that will be used for executable code -// Handles the special requirements that we have on 64-bit platforms -// where we want the executable memory to be located near mscorwks -// -BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize, - DWORD flAllocationType, - DWORD flProtect); - // // Allocate free memory within the range [pMinAddr..pMaxAddr] using // ClrVirtualQuery to find free memory and ClrVirtualAlloc to allocate it. diff --git a/src/coreclr/minipal/CMakeLists.txt b/src/coreclr/minipal/CMakeLists.txt new file mode 100644 index 0000000000000..3096237d2a2fe --- /dev/null +++ b/src/coreclr/minipal/CMakeLists.txt @@ -0,0 +1,7 @@ +include_directories(.) +if (CLR_CMAKE_HOST_UNIX) + add_subdirectory(Unix) +else (CLR_CMAKE_HOST_UNIX) + add_subdirectory(Windows) +endif (CLR_CMAKE_HOST_UNIX) + diff --git a/src/coreclr/minipal/Unix/CMakeLists.txt b/src/coreclr/minipal/Unix/CMakeLists.txt new file mode 100644 index 0000000000000..b56b5017d375f --- /dev/null +++ b/src/coreclr/minipal/Unix/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(coreclrminipal + STATIC + doublemapping.cpp +) diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp new file mode 100644 index 0000000000000..a50b326861aad --- /dev/null +++ b/src/coreclr/minipal/Unix/doublemapping.cpp @@ -0,0 +1,211 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef TARGET_LINUX +#include +#include // __NR_memfd_create +#endif // TARGET_LINUX +#include "minipal.h" + +#if defined(TARGET_OSX) && defined(TARGET_AMD64) +#include +#endif // TARGET_OSX && TARGET_AMD64 + +#ifndef TARGET_OSX + +#ifdef TARGET_64BIT +static const off_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024; +#else +static const off_t MaxDoubleMappedSize = UINT_MAX; +#endif + +#ifdef TARGET_LINUX +#define memfd_create(...) syscall(__NR_memfd_create, __VA_ARGS__) +#endif // TARGET_LINUX + +#endif // TARGET_OSX + +bool VMToOSInterface::CreateDoubleMemoryMapper(void** pHandle, size_t *pMaxExecutableCodeSize) +{ +#ifndef TARGET_OSX + +#ifdef TARGET_FREEBSD + int fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, S_IRWXU); +#else // TARGET_FREEBSD + int fd = memfd_create("doublemapper", MFD_CLOEXEC); +#endif // TARGET_FREEBSD + + if (fd == -1) + { + return false; + } + + if (ftruncate(fd, MaxDoubleMappedSize) == -1) + { + close(fd); + return false; + } + + *pMaxExecutableCodeSize = MaxDoubleMappedSize; + *pHandle = (void*)(size_t)fd; +#else // !TARGET_OSX + *pMaxExecutableCodeSize = SIZE_MAX; + *pHandle = NULL; +#endif // !TARGET_OSX + + return true; +} + +void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle) +{ +#ifndef TARGET_OSX + close((int)(size_t)mapperHandle); +#endif +} + +extern "C" void* PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(const void* lpBeginAddress, const void* lpEndAddress, size_t dwSize); + +#ifdef TARGET_OSX +bool IsMapJitFlagNeeded() +{ + static volatile int isMapJitFlagNeeded = -1; + + if (isMapJitFlagNeeded == -1) + { + int mapJitFlagCheckResult = 0; + int pageSize = sysconf(_SC_PAGE_SIZE); + // Try to map a page with read-write-execute protection. It should fail on Mojave hardened runtime and higher. + void* testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (testPage == MAP_FAILED && (errno == EACCES)) + { + // The mapping has failed with EACCES, check if making the same mapping with MAP_JIT flag works + testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT, -1, 0); + if (testPage != MAP_FAILED) + { + mapJitFlagCheckResult = 1; + } + } + + if (testPage != MAP_FAILED) + { + munmap(testPage, pageSize); + } + + isMapJitFlagNeeded = mapJitFlagCheckResult; + } + + return (bool)isMapJitFlagNeeded; +} +#endif // TARGET_OSX + +void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd) +{ + int fd = (int)(size_t)mapperHandle; + + if (rangeStart != NULL || rangeEnd != NULL) + { + void* result = PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(rangeStart, rangeEnd, size); +#ifndef TARGET_OSX + if (result != NULL) + { + // Map the shared memory over the range reserved from the executable memory allocator. + result = mmap(result, size, PROT_NONE, MAP_SHARED | MAP_FIXED, fd, offset); + if (result == MAP_FAILED) + { + assert(false); + result = NULL; + } + } +#endif // TARGET_OSX + + return result; + } + +#ifndef TARGET_OSX + void* result = mmap(NULL, size, PROT_NONE, MAP_SHARED, fd, offset); +#else + int mmapFlags = MAP_ANON | MAP_PRIVATE; + if (IsMapJitFlagNeeded()) + { + mmapFlags |= MAP_JIT; + } + void* result = mmap(NULL, size, PROT_NONE, mmapFlags, -1, 0); +#endif + if (result == MAP_FAILED) + { + assert(false); + result = NULL; + } + return result; +} + +void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable) +{ + if (mprotect(pStart, size, isExecutable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE)) == -1) + { + return NULL; + } + + return pStart; +} + +bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ +#ifndef TARGET_OSX + int fd = (int)(size_t)mapperHandle; + mmap(pStart, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, offset); + memset(pStart, 0, size); +#endif // TARGET_OSX + return munmap(pStart, size) != -1; +} + +void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ +#ifndef TARGET_OSX + int fd = (int)(size_t)mapperHandle; + return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); +#else // TARGET_OSX +#ifdef TARGET_AMD64 + vm_address_t startRW; + vm_prot_t curProtection, maxProtection; + kern_return_t kr = vm_remap(mach_task_self(), &startRW, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, + mach_task_self(), (vm_address_t)pStart, FALSE, &curProtection, &maxProtection, VM_INHERIT_NONE); + + if (kr != KERN_SUCCESS) + { + return NULL; + } + + int st = mprotect((void*)startRW, size, PROT_READ | PROT_WRITE); + if (st == -1) + { + munmap((void*)startRW, size); + return NULL; + } + + return (void*)startRW; +#else // TARGET_AMD64 + // This method should not be called on OSX ARM64 + assert(false); + return NULL; +#endif // TARGET_AMD64 +#endif // TARGET_OSX +} + +bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) +{ + return munmap(pStart, size) != -1; +} diff --git a/src/coreclr/minipal/Windows/CMakeLists.txt b/src/coreclr/minipal/Windows/CMakeLists.txt new file mode 100644 index 0000000000000..b56b5017d375f --- /dev/null +++ b/src/coreclr/minipal/Windows/CMakeLists.txt @@ -0,0 +1,4 @@ +add_library(coreclrminipal + STATIC + doublemapping.cpp +) diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp new file mode 100644 index 0000000000000..e265f1d139ad0 --- /dev/null +++ b/src/coreclr/minipal/Windows/doublemapping.cpp @@ -0,0 +1,205 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// + +#include +#include +#include +#include "minipal.h" + +#define HIDWORD(_qw) ((ULONG)((_qw) >> 32)) +#define LODWORD(_qw) ((ULONG)(_qw)) + +#ifdef TARGET_64BIT +static const uint64_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024; +#else +static const uint64_t MaxDoubleMappedSize = UINT_MAX; +#endif + +#define VIRTUAL_ALLOC_RESERVE_GRANULARITY (64*1024) // 0x10000 (64 KB) +inline size_t ALIGN_UP( size_t val, size_t alignment ) +{ + // alignment must be a power of 2 for this implementation to work (need modulo otherwise) + assert( 0 == (alignment & (alignment - 1)) ); + size_t result = (val + (alignment - 1)) & ~(alignment - 1); + assert( result >= val ); // check for overflow + return result; +} + +template inline T ALIGN_UP(T val, size_t alignment) +{ + return (T)ALIGN_UP((size_t)val, alignment); +} + +inline void *GetTopMemoryAddress(void) +{ + static void *result; // = NULL; + if( NULL == result ) + { + SYSTEM_INFO sysInfo; + GetSystemInfo( &sysInfo ); + result = sysInfo.lpMaximumApplicationAddress; + } + return result; +} + +inline void *GetBotMemoryAddress(void) +{ + static void *result; // = NULL; + if( NULL == result ) + { + SYSTEM_INFO sysInfo; + GetSystemInfo( &sysInfo ); + result = sysInfo.lpMinimumApplicationAddress; + } + return result; +} + +#define TOP_MEMORY (GetTopMemoryAddress()) +#define BOT_MEMORY (GetBotMemoryAddress()) + +bool VMToOSInterface::CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize) +{ + *pMaxExecutableCodeSize = (size_t)MaxDoubleMappedSize; + *pHandle = CreateFileMapping( + INVALID_HANDLE_VALUE, // use paging file + NULL, // default security + PAGE_EXECUTE_READWRITE | SEC_RESERVE, // read/write/execute access + HIDWORD(MaxDoubleMappedSize), // maximum object size (high-order DWORD) + LODWORD(MaxDoubleMappedSize), // maximum object size (low-order DWORD) + NULL); + + return *pHandle != NULL; +} + +void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle) +{ + CloseHandle((HANDLE)mapperHandle); +} + +void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *pMinAddr, const void* pMaxAddr) +{ + BYTE *pResult = nullptr; // our return value; + + if (size == 0) + { + return nullptr; + } + + // + // First lets normalize the pMinAddr and pMaxAddr values + // + // If pMinAddr is NULL then set it to BOT_MEMORY + if ((pMinAddr == 0) || (pMinAddr < (BYTE *) BOT_MEMORY)) + { + pMinAddr = (BYTE *) BOT_MEMORY; + } + + // If pMaxAddr is NULL then set it to TOP_MEMORY + if ((pMaxAddr == 0) || (pMaxAddr > (BYTE *) TOP_MEMORY)) + { + pMaxAddr = (BYTE *) TOP_MEMORY; + } + + // If pMaxAddr is not greater than pMinAddr we can not make an allocation + if (pMaxAddr <= pMinAddr) + { + return nullptr; + } + + // If pMinAddr is BOT_MEMORY and pMaxAddr is TOP_MEMORY + // then we can call ClrVirtualAlloc instead + if ((pMinAddr == (BYTE *) BOT_MEMORY) && (pMaxAddr == (BYTE *) TOP_MEMORY)) + { + return (BYTE*)MapViewOfFile((HANDLE)mapperHandle, + FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE, + HIDWORD((int64_t)offset), + LODWORD((int64_t)offset), + size); + } + + // We will do one scan from [pMinAddr .. pMaxAddr] + // First align the tryAddr up to next 64k base address. + // See docs for VirtualAllocEx and lpAddress and 64k alignment for reasons. + // + BYTE * tryAddr = (BYTE *)ALIGN_UP((BYTE *)pMinAddr, VIRTUAL_ALLOC_RESERVE_GRANULARITY); + bool virtualQueryFailed = false; + bool faultInjected = false; + unsigned virtualQueryCount = 0; + + // Now scan memory and try to find a free block of the size requested. + while ((tryAddr + size) <= (BYTE *) pMaxAddr) + { + MEMORY_BASIC_INFORMATION mbInfo; + + // Use VirtualQuery to find out if this address is MEM_FREE + // + virtualQueryCount++; + if (!VirtualQuery((LPCVOID)tryAddr, &mbInfo, sizeof(mbInfo))) + { + // Exit and return nullptr if the VirtualQuery call fails. + virtualQueryFailed = true; + break; + } + + // Is there enough memory free from this start location? + // Note that for most versions of UNIX the mbInfo.RegionSize returned will always be 0 + if ((mbInfo.State == MEM_FREE) && + (mbInfo.RegionSize >= (SIZE_T) size || mbInfo.RegionSize == 0)) + { + // Try reserving the memory using VirtualAlloc now + pResult = (BYTE*)MapViewOfFileEx((HANDLE)mapperHandle, + FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE, + HIDWORD((int64_t)offset), + LODWORD((int64_t)offset), + size, + tryAddr); + + // Normally this will be successful + // + if (pResult != nullptr) + { + // return pResult + break; + } + + // We might fail in a race. So just move on to next region and continue trying + tryAddr = tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY; + } + else + { + // Try another section of memory + tryAddr = max(tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY, + (BYTE*) mbInfo.BaseAddress + mbInfo.RegionSize); + } + } + + return pResult; +} + +void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable) +{ + return VirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READ : PAGE_READWRITE); +} + +bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ + // Zero the memory before the unmapping + VirtualAlloc(pStart, size, MEM_COMMIT, PAGE_READWRITE); + memset(pStart, 0, size); + return UnmapViewOfFile(pStart); +} + +void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size) +{ + return (BYTE*)MapViewOfFile((HANDLE)mapperHandle, + FILE_MAP_READ | FILE_MAP_WRITE, + HIDWORD((int64_t)offset), + LODWORD((int64_t)offset), + size); +} + +bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size) +{ + return UnmapViewOfFile(pStart); +} diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h new file mode 100644 index 0000000000000..39098f9bc1295 --- /dev/null +++ b/src/coreclr/minipal/minipal.h @@ -0,0 +1,78 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// +#include + +// Interface between the runtime and platform specific functionality +class VMToOSInterface +{ +private: + ~VMToOSInterface() {} +public: + // Create double mapped memory mapper + // Parameters: + // pHandle - receives handle of the double mapped memory mapper + // pMaxExecutableCodeSize - receives the maximum executable memory size it can map + // Return: + // true if it succeeded, false if it failed + static bool CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize); + + // Destroy the double mapped memory mapper represented by the passed in handle + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to destroy + static void DestroyDoubleMemoryMapper(void *mapperHandle); + + // Reserve a block of memory that can be double mapped. + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to use + // offset - offset in the underlying shared memory + // size - size of the block to reserve + // rangeStart + // rangeEnd - Requests reserving virtual memory in the specified range. + // Setting both rangeStart and rangeEnd to 0 means that the + // requested range is not limited. + // When a specific range is requested, it is obligatory. + // Return: + // starting virtual address of the reserved memory or NULL if it failed + static void* ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd); + + // Commit a block of memory in the range previously reserved by the ReserveDoubleMappedMemory + // Parameters: + // pStart - start address of the virtual address range to commit + // size - size of the memory block to commit + // isExecutable - true means that the mapping should be RX, false means RW + // Return: + // Committed range start + static void* CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable); + + // Release a block of virtual memory previously commited by the CommitDoubleMappedMemory + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to use + // pStart - start address of the virtual address range to release. It must be one + // that was previously returned by the CommitDoubleMappedMemory + // offset - offset in the underlying shared memory + // size - size of the memory block to release + // Return: + // true if it succeeded, false if it failed + static bool ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size); + + // Get a RW mapping for the RX block specified by the arguments + // Parameters: + // mapperHandle - handle of the double mapped memory mapper to use + // pStart - start address of the RX virtual address range. + // offset - offset in the underlying shared memory + // size - size of the memory block to map as RW + // Return: + // Starting virtual address of the RW mapping. + static void* GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size); + + // Release RW mapping of the block specified by the arguments + // Parameters: + // pStart - Start address of the RW virtual address range. It must be an address + // previously returned by the GetRWMapping. + // size - Size of the memory block to release. It must be the size previously + // passed to the GetRWMapping that returned the pStart. + // Return: + // true if it succeeded, false if it failed + static bool ReleaseRWMapping(void* pStart, size_t size); +}; diff --git a/src/coreclr/utilcode/CMakeLists.txt b/src/coreclr/utilcode/CMakeLists.txt index 1ae433adbfd89..8c57742cb6315 100644 --- a/src/coreclr/utilcode/CMakeLists.txt +++ b/src/coreclr/utilcode/CMakeLists.txt @@ -69,6 +69,7 @@ endif(CLR_CMAKE_TARGET_WIN32) set(UTILCODE_SOURCES ${UTILCODE_COMMON_SOURCES} + executableallocator.cpp ) set(UTILCODE_DAC_SOURCES diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp new file mode 100644 index 0000000000000..ac4c326c83784 --- /dev/null +++ b/src/coreclr/utilcode/executableallocator.cpp @@ -0,0 +1,755 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "pedecoder.h" +#include "executableallocator.h" + +#if USE_UPPER_ADDRESS +// Preferred region to allocate the code in. +BYTE * ExecutableAllocator::g_codeMinAddr; +BYTE * ExecutableAllocator::g_codeMaxAddr; +BYTE * ExecutableAllocator::g_codeAllocStart; +// Next address to try to allocate for code in the preferred region. +BYTE * ExecutableAllocator::g_codeAllocHint; +#endif // USE_UPPER_ADDRESS + +bool ExecutableAllocator::g_isWXorXEnabled = false; + +ExecutableAllocator::FatalErrorHandler ExecutableAllocator::g_fatalErrorHandler = NULL; + +ExecutableAllocator* ExecutableAllocator::g_instance = NULL; + +bool ExecutableAllocator::IsDoubleMappingEnabled() +{ + LIMITED_METHOD_CONTRACT; + +#if defined(HOST_OSX) && defined(HOST_ARM64) + return false; +#else + return g_isWXorXEnabled; +#endif +} + +bool ExecutableAllocator::IsWXORXEnabled() +{ + LIMITED_METHOD_CONTRACT; + +#if defined(HOST_OSX) && defined(HOST_ARM64) + return true; +#else + return g_isWXorXEnabled; +#endif +} + +extern SYSTEM_INFO g_SystemInfo; + +size_t ExecutableAllocator::Granularity() +{ + LIMITED_METHOD_CONTRACT; + + return g_SystemInfo.dwAllocationGranularity; +} + +// Use this function to initialize the g_codeAllocHint +// during startup. base is runtime .dll base address, +// size is runtime .dll virtual size. +void ExecutableAllocator::InitCodeAllocHint(size_t base, size_t size, int randomPageOffset) +{ +#if USE_UPPER_ADDRESS + +#ifdef _DEBUG + // If GetForceRelocs is enabled we don't constrain the pMinAddr + if (PEDecoder::GetForceRelocs()) + return; +#endif + + // + // If we are using the UPPER_ADDRESS space (on Win64) + // then for any code heap that doesn't specify an address + // range using [pMinAddr..pMaxAddr] we place it in the + // upper address space + // This enables us to avoid having to use long JumpStubs + // to reach the code for our ngen-ed images. + // Which are also placed in the UPPER_ADDRESS space. + // + SIZE_T reach = 0x7FFF0000u; + + // We will choose the preferred code region based on the address of clr.dll. The JIT helpers + // in clr.dll are the most heavily called functions. + g_codeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0; + g_codeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1; + + BYTE * pStart; + + if (g_codeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS && + (BYTE *)CODEHEAP_START_ADDRESS < g_codeMaxAddr) + { + // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista) + // Use the code head start address that does not cause collisions with NGen images. + // This logic is coupled with scripts that we use to assign base addresses. + pStart = (BYTE *)CODEHEAP_START_ADDRESS; + } + else + if (base > UINT32_MAX) + { + // clr.dll got address assigned by ASLR? + // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned + // addresses. Do not start at g_codeMinAddr exactly so that we can also reach common native images + // that can be placed at higher addresses than clr.dll. + pStart = g_codeMinAddr + (g_codeMaxAddr - g_codeMinAddr) / 8; + } + else + { + // clr.dll missed the base address? + // Try to occupy the space right after it. + pStart = (BYTE *)(base + size); + } + + // Randomize the address space + pStart += GetOsPageSize() * randomPageOffset; + + g_codeAllocStart = pStart; + g_codeAllocHint = pStart; +#endif +} + +// Use this function to reset the g_codeAllocHint +// after unloading an AppDomain +void ExecutableAllocator::ResetCodeAllocHint() +{ + LIMITED_METHOD_CONTRACT; +#if USE_UPPER_ADDRESS + g_codeAllocHint = g_codeAllocStart; +#endif +} + +// Returns TRUE if p is located in near clr.dll that allows us +// to use rel32 IP-relative addressing modes. +bool ExecutableAllocator::IsPreferredExecutableRange(void * p) +{ + LIMITED_METHOD_CONTRACT; +#if USE_UPPER_ADDRESS + if (g_codeMinAddr <= (BYTE *)p && (BYTE *)p < g_codeMaxAddr) + return true; +#endif + return false; +} + +ExecutableAllocator* ExecutableAllocator::Instance() +{ + LIMITED_METHOD_CONTRACT; + return g_instance; +} + +ExecutableAllocator::~ExecutableAllocator() +{ + if (IsDoubleMappingEnabled()) + { + VMToOSInterface::DestroyDoubleMemoryMapper(m_doubleMemoryMapperHandle); + } +} + +HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandler) +{ + LIMITED_METHOD_CONTRACT; + + g_fatalErrorHandler = fatalErrorHandler; + g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWriteXorExecute) != 0; + g_instance = new (nothrow) ExecutableAllocator(); + if (g_instance == NULL) + { + return E_OUTOFMEMORY; + } + + if (!g_instance->Initialize()) + { + return E_FAIL; + } + + return S_OK; +} + +bool ExecutableAllocator::Initialize() +{ + LIMITED_METHOD_CONTRACT; + + if (IsDoubleMappingEnabled()) + { + if (!VMToOSInterface::CreateDoubleMemoryMapper(&m_doubleMemoryMapperHandle, &m_maxExecutableCodeSize)) + { + return false; + } + + m_CriticalSection = ClrCreateCriticalSection(CrstExecutableAllocatorLock,CrstFlags(CRST_UNSAFE_ANYMODE | CRST_DEBUGGER_THREAD)); + } + + return true; +} + +//#define ENABLE_CACHED_MAPPINGS + +void ExecutableAllocator::UpdateCachedMapping(BlockRW* pBlock) +{ + LIMITED_METHOD_CONTRACT; +#ifdef ENABLE_CACHED_MAPPINGS + if (m_cachedMapping == NULL) + { + m_cachedMapping = pBlock; + pBlock->refCount++; + } + else if (m_cachedMapping != pBlock) + { + void* unmapAddress = NULL; + size_t unmapSize; + + if (!RemoveRWBlock(m_cachedMapping->baseRW, &unmapAddress, &unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found")); + } + if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed")); + } + m_cachedMapping = pBlock; + pBlock->refCount++; + } +#endif // ENABLE_CACHED_MAPPINGS +} + +void* ExecutableAllocator::FindRWBlock(void* baseRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next) + { + if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) + { + pBlock->refCount++; + UpdateCachedMapping(pBlock); + + return (BYTE*)pBlock->baseRW + ((size_t)baseRX - (size_t)pBlock->baseRX); + } + } + + return NULL; +} + +bool ExecutableAllocator::AddRWBlock(void* baseRW, void* baseRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next) + { + if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) + { + break; + } + } + + // The new "nothrow" below failure is handled as fail fast since it is not recoverable + PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure); + + BlockRW* pBlockRW = new (nothrow) BlockRW(); + if (pBlockRW == NULL) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block metadata cannot be allocated")); + return false; + } + + pBlockRW->baseRW = baseRW; + pBlockRW->baseRX = baseRX; + pBlockRW->size = size; + pBlockRW->next = m_pFirstBlockRW; + pBlockRW->refCount = 1; + m_pFirstBlockRW = pBlockRW; + + UpdateCachedMapping(pBlockRW); + + return true; +} + +bool ExecutableAllocator::RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize) +{ + LIMITED_METHOD_CONTRACT; + + BlockRW* pPrevBlockRW = NULL; + for (BlockRW* pBlockRW = m_pFirstBlockRW; pBlockRW != NULL; pBlockRW = pBlockRW->next) + { + if (pBlockRW->baseRW <= pRW && (size_t)pRW < ((size_t)pBlockRW->baseRW + pBlockRW->size)) + { + // found + pBlockRW->refCount--; + if (pBlockRW->refCount != 0) + { + *pUnmapAddress = NULL; + return true; + } + + if (pPrevBlockRW == NULL) + { + m_pFirstBlockRW = pBlockRW->next; + } + else + { + pPrevBlockRW->next = pBlockRW->next; + } + + *pUnmapAddress = pBlockRW->baseRW; + *pUnmapSize = pBlockRW->size; + + delete pBlockRW; + return true; + } + + pPrevBlockRW = pBlockRW; + } + + return false; +} + +bool ExecutableAllocator::AllocateOffset(size_t* pOffset, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + size_t offset = m_freeOffset; + size_t newFreeOffset = offset + size; + + if (newFreeOffset > m_maxExecutableCodeSize) + { + return false; + } + + m_freeOffset = newFreeOffset; + + *pOffset = offset; + + return true; +} + +void ExecutableAllocator::AddRXBlock(BlockRX* pBlock) +{ + LIMITED_METHOD_CONTRACT; + + pBlock->next = m_pFirstBlockRX; + m_pFirstBlockRX = pBlock; +} + +void* ExecutableAllocator::Commit(void* pStart, size_t size, bool isExecutable) +{ + LIMITED_METHOD_CONTRACT; + + if (IsDoubleMappingEnabled()) + { + return VMToOSInterface::CommitDoubleMappedMemory(pStart, size, isExecutable); + } + else + { + return ClrVirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); + } +} + +void ExecutableAllocator::Release(void* pRX) +{ + LIMITED_METHOD_CONTRACT; + + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + // Locate the RX block corresponding to the pRX and remove it from the linked list + BlockRX* pBlock; + BlockRX* pPrevBlock = NULL; + + for (pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next) + { + if (pRX == pBlock->baseRX) + { + if (pPrevBlock == NULL) + { + m_pFirstBlockRX = pBlock->next; + } + else + { + pPrevBlock->next = pBlock->next; + } + + break; + } + pPrevBlock = pBlock; + } + + if (pBlock != NULL) + { + VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size); + // Put the released block into the free block list + pBlock->baseRX = NULL; + pBlock->next = m_pFirstFreeBlockRX; + m_pFirstFreeBlockRX = pBlock; + } + else + { + // The block was not found, which should never happen. + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to release was not found")); + } + } + else + { + ClrVirtualFree(pRX, 0, MEM_RELEASE); + } +} + +// Find a free block with the closest size >= the requested size. +// Returns NULL if no such block exists. +ExecutableAllocator::BlockRX* ExecutableAllocator::FindBestFreeBlock(size_t size) +{ + LIMITED_METHOD_CONTRACT; + + BlockRX* pPrevBlock = NULL; + BlockRX* pPrevBestBlock = NULL; + BlockRX* pBestBlock = NULL; + BlockRX* pBlock = m_pFirstFreeBlockRX; + + while (pBlock != NULL) + { + if (pBlock->size >= size) + { + if (pBestBlock != NULL) + { + if (pBlock->size < pBestBlock->size) + { + pPrevBestBlock = pPrevBlock; + pBestBlock = pBlock; + } + } + else + { + pPrevBestBlock = pPrevBlock; + pBestBlock = pBlock; + } + } + pPrevBlock = pBlock; + pBlock = pBlock->next; + } + + if (pBestBlock != NULL) + { + if (pPrevBestBlock != NULL) + { + pPrevBestBlock->next = pBestBlock->next; + } + else + { + m_pFirstFreeBlockRX = pBestBlock->next; + } + + pBestBlock->next = NULL; + } + + return pBestBlock; +} + +// Allocate a new block of executable memory and the related descriptor structure. +// First try to get it from the free blocks and if there is no suitable free block, +// allocate a new one. +ExecutableAllocator::BlockRX* ExecutableAllocator::AllocateBlock(size_t size, bool* pIsFreeBlock) +{ + LIMITED_METHOD_CONTRACT; + + size_t offset; + BlockRX* block = FindBestFreeBlock(size); + *pIsFreeBlock = (block != NULL); + + if (block == NULL) + { + if (!AllocateOffset(&offset, size)) + { + return NULL; + } + + block = new (nothrow) BlockRX(); + if (block == NULL) + { + return NULL; + } + + block->offset = offset; + block->size = size; + } + + return block; +} + +// Backout a previously allocated block. The block is added to the free blocks list and +// reused for later allocation requests. +void ExecutableAllocator::BackoutBlock(BlockRX* pBlock, bool isFreeBlock) +{ + LIMITED_METHOD_CONTRACT; + + if (!isFreeBlock) + { + m_freeOffset -= pBlock->size; + delete pBlock; + } + else + { + pBlock->next = m_pFirstFreeBlockRX; + m_pFirstFreeBlockRX = pBlock; + } +} + +// Reserve executable memory within the specified virtual address space range. If it is not possible to +// reserve memory in that range, the method returns NULL and nothing is allocated. +void* ExecutableAllocator::ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE((size & (Granularity() - 1)) == 0); + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(size, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + void *result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, loAddress, hiAddress); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + + return result; + } + else + { + DWORD allocationType = MEM_RESERVE; +#ifdef HOST_UNIX + // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory. + // This will allow us to place JIT'ed code close to the coreclr library + // and thus improve performance by avoiding jump stubs in managed code. + allocationType |= MEM_RESERVE_EXECUTABLE; +#endif + return ClrVirtualAllocWithinRange((const BYTE*)loAddress, (const BYTE*)hiAddress, size, allocationType, PAGE_NOACCESS); + } +} + +// Reserve executable memory. On Windows it tries to use the allocation hints to +// allocate memory close to the previously allocated executable memory and loaded +// executable files. +void* ExecutableAllocator::Reserve(size_t size) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE((size & (Granularity() - 1)) == 0); + + BYTE *result = NULL; + +#if USE_UPPER_ADDRESS + // + // If we are using the UPPER_ADDRESS space (on Win64) + // then for any heap that will contain executable code + // we will place it in the upper address space + // + // This enables us to avoid having to use JumpStubs + // to reach the code for our ngen-ed images on x64, + // since they are also placed in the UPPER_ADDRESS space. + // + BYTE * pHint = g_codeAllocHint; + + if (size <= (SIZE_T)(g_codeMaxAddr - g_codeMinAddr) && pHint != NULL) + { + // Try to allocate in the preferred region after the hint + result = (BYTE*)ReserveWithinRange(size, pHint, g_codeMaxAddr); + if (result != NULL) + { + g_codeAllocHint = result + size; + } + else + { + // Try to allocate in the preferred region before the hint + result = (BYTE*)ReserveWithinRange(size, g_codeMinAddr, pHint + size); + + if (result != NULL) + { + g_codeAllocHint = result + size; + } + + g_codeAllocHint = NULL; + } + } + + // Fall through to +#endif // USE_UPPER_ADDRESS + + if (result == NULL) + { + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(size, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + result = (BYTE*)VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, 0, 0); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + } + else + { + DWORD allocationType = MEM_RESERVE; +#ifdef HOST_UNIX + // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory. + // This will allow us to place JIT'ed code close to the coreclr library + // and thus improve performance by avoiding jump stubs in managed code. + allocationType |= MEM_RESERVE_EXECUTABLE; +#endif + result = (BYTE*)ClrVirtualAlloc(NULL, size, allocationType, PAGE_NOACCESS); + } + } + + return result; +} + +// Reserve a block of executable memory at the specified virtual address. If it is not +// possible, the method returns NULL. +void* ExecutableAllocator::ReserveAt(void* baseAddressRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + _ASSERTE((size & (Granularity() - 1)) == 0); + + if (IsDoubleMappingEnabled()) + { + CRITSEC_Holder csh(m_CriticalSection); + + bool isFreeBlock; + BlockRX* block = AllocateBlock(size, &isFreeBlock); + if (block == NULL) + { + return NULL; + } + + void* result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, baseAddressRX, baseAddressRX); + + if (result != NULL) + { + block->baseRX = result; + AddRXBlock(block); + } + else + { + BackoutBlock(block, isFreeBlock); + } + + return result; + } + else + { + return VirtualAlloc(baseAddressRX, size, MEM_RESERVE, PAGE_NOACCESS); + } +} + +// Map an executable memory block as writeable. If there is already a mapping +// covering the specified block, return that mapping instead of creating a new one. +// Return starting address of the writeable mapping. +void* ExecutableAllocator::MapRW(void* pRX, size_t size) +{ + LIMITED_METHOD_CONTRACT; + + if (!IsDoubleMappingEnabled()) + { + return pRX; + } + + CRITSEC_Holder csh(m_CriticalSection); + + void* result = FindRWBlock(pRX, size); + if (result != NULL) + { + return result; + } + + for (BlockRX* pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next) + { + if (pRX >= pBlock->baseRX && ((size_t)pRX + size) <= ((size_t)pBlock->baseRX + pBlock->size)) + { + // Offset of the RX address in the originally allocated block + size_t offset = (size_t)pRX - (size_t)pBlock->baseRX; + // Offset of the RX address that will start the newly mapped block + size_t mapOffset = ALIGN_DOWN(offset, Granularity()); + // Size of the block we will map + size_t mapSize = ALIGN_UP(offset - mapOffset + size, Granularity()); + void* pRW = VMToOSInterface::GetRWMapping(m_doubleMemoryMapperHandle, (BYTE*)pBlock->baseRX + mapOffset, pBlock->offset + mapOffset, mapSize); + + if (pRW == NULL) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Failed to create RW mapping for RX memory")); + } + + AddRWBlock(pRW, (BYTE*)pBlock->baseRX + mapOffset, mapSize); + + return (void*)((size_t)pRW + (offset - mapOffset)); + } + else if (pRX >= pBlock->baseRX && pRX < (void*)((size_t)pBlock->baseRX + pBlock->size)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to RW map a block that crosses the end of the allocated RX range")); + } + else if (pRX < pBlock->baseRX && (void*)((size_t)pRX + size) > pBlock->baseRX) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to map a block that crosses the beginning of the allocated range")); + } + } + + // The executable memory block was not found, so we cannot provide the writeable mapping. + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to map as RW was not found")); + return NULL; +} + +// Unmap writeable mapping at the specified address. The address must be an address +// returned by the MapRW method. +void ExecutableAllocator::UnmapRW(void* pRW) +{ + LIMITED_METHOD_CONTRACT; + + if (!IsDoubleMappingEnabled()) + { + return; + } + + CRITSEC_Holder csh(m_CriticalSection); + _ASSERTE(pRW != NULL); + + void* unmapAddress = NULL; + size_t unmapSize; + + if (!RemoveRWBlock(pRW, &unmapAddress, &unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found")); + } + + if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize)) + { + g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed")); + } +} diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp index adaf07d8f5825..b3b381b2f9bef 100644 --- a/src/coreclr/utilcode/loaderheap.cpp +++ b/src/coreclr/utilcode/loaderheap.cpp @@ -695,15 +695,21 @@ size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHeap); struct LoaderHeapFreeBlock { public: - LoaderHeapFreeBlock *m_pNext; // Pointer to next block on free list - size_t m_dwSize; // Total size of this block (including this header) -//! Try not to grow the size of this structure. It places a minimum size on LoaderHeap allocations. + LoaderHeapFreeBlock *m_pNext; // Pointer to next block on free list + size_t m_dwSize; // Total size of this block + void *m_pBlockAddress; // Virtual address of the block +#ifndef DACCESS_COMPILE static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; + // The new "nothrow" below failure is handled in a non-fault way, so + // make sure that callers with FORBID_FAULT can call this method without + // firing the contract violation assert. + PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure); + LOADER_HEAP_BEGIN_TRAP_FAULT // It's illegal to insert a free block that's smaller than the minimum sized allocation - @@ -722,19 +728,30 @@ struct LoaderHeapFreeBlock } #endif - INDEBUG(memset(pMem, 0xcc, dwTotalSize);) - LoaderHeapFreeBlock *pNewBlock = (LoaderHeapFreeBlock*)pMem; - pNewBlock->m_pNext = *ppHead; - pNewBlock->m_dwSize = dwTotalSize; - *ppHead = pNewBlock; + void* pMemRW = pMem; + ExecutableWriterHolder memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder = ExecutableWriterHolder(pMem, dwTotalSize); + pMemRW = memWriterHolder.GetRW(); + } - MergeBlock(pNewBlock, pHeap); + INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);) + LoaderHeapFreeBlock *pNewBlock = new (nothrow) LoaderHeapFreeBlock; + // If we fail allocating the LoaderHeapFreeBlock, ignore the failure and don't insert the free block at all. + if (pNewBlock != NULL) + { + pNewBlock->m_pNext = *ppHead; + pNewBlock->m_dwSize = dwTotalSize; + pNewBlock->m_pBlockAddress = pMem; + *ppHead = pNewBlock; + MergeBlock(pNewBlock, pHeap); + } LOADER_HEAP_END_TRAP_FAULT } - - static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, BOOL fRemoveFromFreeList, UnlockedLoaderHeap *pHeap) + static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, UnlockedLoaderHeap *pHeap) { STATIC_CONTRACT_NOTHROW; STATIC_CONTRACT_GC_NOTRIGGER; @@ -751,23 +768,19 @@ struct LoaderHeapFreeBlock size_t dwCurSize = pCur->m_dwSize; if (dwCurSize == dwSize) { - pResult = pCur; + pResult = pCur->m_pBlockAddress; // Exact match. Hooray! - if (fRemoveFromFreeList) - { - *ppWalk = pCur->m_pNext; - } + *ppWalk = pCur->m_pNext; + delete pCur; break; } else if (dwCurSize > dwSize && (dwCurSize - dwSize) >= AllocMem_TotalSize(1, pHeap)) { // Partial match. Ok... - pResult = pCur; - if (fRemoveFromFreeList) - { - *ppWalk = pCur->m_pNext; - InsertFreeBlock(ppWalk, ((BYTE*)pCur) + dwSize, dwCurSize - dwSize, pHeap ); - } + pResult = pCur->m_pBlockAddress; + *ppWalk = pCur->m_pNext; + InsertFreeBlock(ppWalk, ((BYTE*)pCur->m_pBlockAddress) + dwSize, dwCurSize - dwSize, pHeap ); + delete pCur; break; } @@ -777,19 +790,22 @@ struct LoaderHeapFreeBlock ppWalk = &( pCur->m_pNext ); } - if (pResult && fRemoveFromFreeList) + if (pResult) { + void *pResultRW = pResult; + ExecutableWriterHolder resultWriterHolder; + if (pHeap->IsExecutable()) + { + resultWriterHolder = ExecutableWriterHolder(pResult, dwSize); + pResultRW = resultWriterHolder.GetRW(); + } // Callers of loaderheap assume allocated memory is zero-inited so we must preserve this invariant! - memset(pResult, 0, dwSize); + memset(pResultRW, 0, dwSize); } LOADER_HEAP_END_TRAP_FAULT return pResult; - - - } - private: // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened. static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap) @@ -803,7 +819,7 @@ struct LoaderHeapFreeBlock LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext; size_t dwSize = pFreeBlock->m_dwSize; - if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlock) + dwSize)) + if (pNextBlock == NULL || ((BYTE*)pNextBlock->m_pBlockAddress) != (((BYTE*)pFreeBlock->m_pBlockAddress) + dwSize)) { result = FALSE; } @@ -811,9 +827,17 @@ struct LoaderHeapFreeBlock { size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize; LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext; - INDEBUG(memset(pFreeBlock, 0xcc, dwCombinedSize);) + void *pMemRW = pFreeBlock->m_pBlockAddress; + ExecutableWriterHolder memWriterHolder; + if (pHeap->IsExecutable()) + { + memWriterHolder = ExecutableWriterHolder(pFreeBlock->m_pBlockAddress, dwCombinedSize); + pMemRW = memWriterHolder.GetRW(); + } + INDEBUG(memset(pMemRW, 0xcc, dwCombinedSize);) pFreeBlock->m_pNext = pNextNextBlock; pFreeBlock->m_dwSize = dwCombinedSize; + delete pNextBlock; result = TRUE; } @@ -822,7 +846,7 @@ struct LoaderHeapFreeBlock return result; } - +#endif // DACCESS_COMPILE }; @@ -840,8 +864,7 @@ struct LoaderHeapFreeBlock // - z bytes of pad (DEBUG-ONLY) (where "z" is just enough to pointer-align the following byte) // - a bytes of tag (DEBUG-ONLY) (where "a" is sizeof(LoaderHeapValidationTag) // -// - b bytes of pad (if total size after all this < sizeof(LoaderHeapFreeBlock), pad enough to make it the size of LoaderHeapFreeBlock) -// - c bytes of pad (where "c" is just enough to pointer-align the following byte) +// - b bytes of pad (where "b" is just enough to pointer-align the following byte) // // ==> Following address is always pointer-aligned //===================================================================================== @@ -862,10 +885,6 @@ inline size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHe #ifdef _DEBUG dwSize += sizeof(LoaderHeapValidationTag); #endif - if (dwSize < sizeof(LoaderHeapFreeBlock)) - { - dwSize = sizeof(LoaderHeapFreeBlock); - } } dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT)); @@ -977,9 +996,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap() if (fReleaseMemory) { - BOOL fSuccess; - fSuccess = ClrVirtualFree(pVirtualAddress, 0, MEM_RELEASE); - _ASSERTE(fSuccess); + ExecutableAllocator::Instance()->Release(pVirtualAddress); } delete pSearch; @@ -987,9 +1004,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap() if (m_reservedBlock.m_fReleaseMemory) { - BOOL fSuccess; - fSuccess = ClrVirtualFree(m_reservedBlock.pVirtualAddress, 0, MEM_RELEASE); - _ASSERTE(fSuccess); + ExecutableAllocator::Instance()->Release(m_reservedBlock.pVirtualAddress); } INDEBUG(s_dwNumInstancesOfLoaderHeaps --;) @@ -1058,7 +1073,7 @@ void ReleaseReservedMemory(BYTE* value) { if (value) { - ClrVirtualFree(value, 0, MEM_RELEASE); + ExecutableAllocator::Instance()->Release(value); } } @@ -1114,7 +1129,9 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) // Reserve pages // - pData = ClrVirtualAllocExecutable(dwSizeToReserve, MEM_RESERVE, PAGE_NOACCESS); + // Reserve the memory for even non-executable stuff close to the executable code, as it has profound effect + // on e.g. a static variable access performance. + pData = (BYTE *)ExecutableAllocator::Instance()->Reserve(dwSizeToReserve); if (pData == NULL) { return FALSE; @@ -1140,7 +1157,7 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit) } // Commit first set of pages, since it will contain the LoaderHeapBlock - void *pTemp = ClrVirtualAlloc(pData, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); + void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommit, (m_Options & LHF_EXECUTABLE)); if (pTemp == NULL) { //_ASSERTE(!"Unable to ClrVirtualAlloc commit in a loaderheap"); @@ -1213,7 +1230,7 @@ BOOL UnlockedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize) dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize()); // Yes, so commit the desired number of reserved pages - void *pData = ClrVirtualAlloc(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); + void *pData = ExecutableAllocator::Instance()->Commit(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, (m_Options & LHF_EXECUTABLE)); if (pData == NULL) return FALSE; @@ -1316,7 +1333,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize { // Any memory available on the free list? - void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, TRUE /*fRemoveFromFreeList*/, this); + void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, this); if (!pData) { // Enough bytes available in committed region? @@ -1518,8 +1535,6 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize )) { - // Cool. This was the last block allocated. We can just undo the allocation instead - // of going to the freelist. void *pMemRW = pMem; ExecutableWriterHolder memWriterHolder; if (m_Options & LHF_EXECUTABLE) @@ -1527,6 +1542,9 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, memWriterHolder = ExecutableWriterHolder(pMem, dwSize); pMemRW = memWriterHolder.GetRW(); } + + // Cool. This was the last block allocated. We can just undo the allocation instead + // of going to the freelist. memset(pMemRW, 0x00, dwSize); // Fill freed region with 0 m_pAllocPtr = (BYTE*)pMem; } @@ -1534,7 +1552,6 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem, { LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, dwSize, this); } - } diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp index 0026d1f619f14..e7b1755b2b1c4 100644 --- a/src/coreclr/utilcode/util.cpp +++ b/src/coreclr/utilcode/util.cpp @@ -352,168 +352,6 @@ HRESULT FakeCoCreateInstanceEx(REFCLSID rclsid, return hr; } -#if USE_UPPER_ADDRESS -static BYTE * s_CodeMinAddr; // Preferred region to allocate the code in. -static BYTE * s_CodeMaxAddr; -static BYTE * s_CodeAllocStart; -static BYTE * s_CodeAllocHint; // Next address to try to allocate for code in the preferred region. -#endif - -// -// Use this function to initialize the s_CodeAllocHint -// during startup. base is runtime .dll base address, -// size is runtime .dll virtual size. -// -void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset) -{ -#if USE_UPPER_ADDRESS - -#ifdef _DEBUG - // If GetForceRelocs is enabled we don't constrain the pMinAddr - if (PEDecoder::GetForceRelocs()) - return; -#endif - -// - // If we are using the UPPER_ADDRESS space (on Win64) - // then for any code heap that doesn't specify an address - // range using [pMinAddr..pMaxAddr] we place it in the - // upper address space - // This enables us to avoid having to use long JumpStubs - // to reach the code for our ngen-ed images. - // Which are also placed in the UPPER_ADDRESS space. - // - SIZE_T reach = 0x7FFF0000u; - - // We will choose the preferred code region based on the address of clr.dll. The JIT helpers - // in clr.dll are the most heavily called functions. - s_CodeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0; - s_CodeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1; - - BYTE * pStart; - - if (s_CodeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS && - (BYTE *)CODEHEAP_START_ADDRESS < s_CodeMaxAddr) - { - // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista) - // Use the code head start address that does not cause collisions with NGen images. - // This logic is coupled with scripts that we use to assign base addresses. - pStart = (BYTE *)CODEHEAP_START_ADDRESS; - } - else - if (base > UINT32_MAX) - { - // clr.dll got address assigned by ASLR? - // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned - // addresses. Do not start at s_CodeMinAddr exactly so that we can also reach common native images - // that can be placed at higher addresses than clr.dll. - pStart = s_CodeMinAddr + (s_CodeMaxAddr - s_CodeMinAddr) / 8; - } - else - { - // clr.dll missed the base address? - // Try to occupy the space right after it. - pStart = (BYTE *)(base + size); - } - - // Randomize the address space - pStart += GetOsPageSize() * randomPageOffset; - - s_CodeAllocStart = pStart; - s_CodeAllocHint = pStart; -#endif -} - -// -// Use this function to reset the s_CodeAllocHint -// after unloading an AppDomain -// -void ResetCodeAllocHint() -{ - LIMITED_METHOD_CONTRACT; -#if USE_UPPER_ADDRESS - s_CodeAllocHint = s_CodeAllocStart; -#endif -} - -// -// Returns TRUE if p is located in near clr.dll that allows us -// to use rel32 IP-relative addressing modes. -// -BOOL IsPreferredExecutableRange(void * p) -{ - LIMITED_METHOD_CONTRACT; -#if USE_UPPER_ADDRESS - if (s_CodeMinAddr <= (BYTE *)p && (BYTE *)p < s_CodeMaxAddr) - return TRUE; -#endif - return FALSE; -} - -// -// Allocate free memory that will be used for executable code -// Handles the special requirements that we have on 64-bit platforms -// where we want the executable memory to be located near clr.dll -// -BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize, - DWORD flAllocationType, - DWORD flProtect) -{ - CONTRACTL - { - NOTHROW; - } - CONTRACTL_END; - -#if USE_UPPER_ADDRESS - // - // If we are using the UPPER_ADDRESS space (on Win64) - // then for any heap that will contain executable code - // we will place it in the upper address space - // - // This enables us to avoid having to use JumpStubs - // to reach the code for our ngen-ed images on x64, - // since they are also placed in the UPPER_ADDRESS space. - // - BYTE * pHint = s_CodeAllocHint; - - if (dwSize <= (SIZE_T)(s_CodeMaxAddr - s_CodeMinAddr) && pHint != NULL) - { - // Try to allocate in the preferred region after the hint - BYTE * pResult = ClrVirtualAllocWithinRange(pHint, s_CodeMaxAddr, dwSize, flAllocationType, flProtect); - - if (pResult != NULL) - { - s_CodeAllocHint = pResult + dwSize; - return pResult; - } - - // Try to allocate in the preferred region before the hint - pResult = ClrVirtualAllocWithinRange(s_CodeMinAddr, pHint + dwSize, dwSize, flAllocationType, flProtect); - - if (pResult != NULL) - { - s_CodeAllocHint = pResult + dwSize; - return pResult; - } - - s_CodeAllocHint = NULL; - } - - // Fall through to -#endif // USE_UPPER_ADDRESS - -#ifdef HOST_UNIX - // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory. - // This will allow us to place JIT'ed code close to the coreclr library - // and thus improve performance by avoiding jump stubs in managed code. - flAllocationType |= MEM_RESERVE_EXECUTABLE; -#endif // HOST_UNIX - - return (BYTE *) ClrVirtualAlloc (NULL, dwSize, flAllocationType, flProtect); - -} - // // Allocate free memory with specific alignment. // diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index 1d682d2a428bb..9c2cb3df0b7e9 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -833,7 +833,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_DAC_AND_WKS_ARCH ${ARCH_SOURCES_DIR}/exceparm.cpp ${ARCH_SOURCES_DIR}/stubs.cpp - ${ARCH_SOURCES_DIR}/armsinglestepper.cpp ) set(VM_HEADERS_DAC_AND_WKS_ARCH @@ -844,6 +843,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_WKS_ARCH ${ARCH_SOURCES_DIR}/profiler.cpp + ${ARCH_SOURCES_DIR}/armsinglestepper.cpp exceptionhandling.cpp gcinfodecoder.cpp ) @@ -868,7 +868,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) ) if(CLR_CMAKE_HOST_UNIX) - list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH + list(APPEND VM_SOURCES_WKS_ARCH ${ARCH_SOURCES_DIR}/arm64singlestepper.cpp ) endif(CLR_CMAKE_HOST_UNIX) diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 82a301bb0cbd1..219597eb350c2 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -51,37 +51,6 @@ endif extern JIT_InternalThrow:proc -; There is an even more optimized version of these helpers possible which takes -; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 -; that check (this is more significant in the JIT_WriteBarrier case). -; -; Additionally we can look into providing helpers which will take the src/dest from -; specific registers (like x86) which _could_ (??) make for easier register allocation -; for the JIT64, however it might lead to having to have some nasty code that treats -; these guys really special like... :(. -; -; Version that does the move, checks whether or not it's in the GC and whether or not -; it needs to have it's card updated -; -; void JIT_CheckedWriteBarrier(Object** dst, Object* src) -LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT - - ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference - ; but if it isn't then it will just return. - ; - ; See if this is in GCHeap - cmp rcx, [g_lowest_address] - jb NotInHeap - cmp rcx, [g_highest_address] - jnb NotInHeap - - jmp JIT_WriteBarrier - - NotInHeap: - ; See comment above about possible AV - mov [rcx], rdx - ret -LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT ; Mark start of the code region that we patch at runtime LEAF_ENTRY JIT_PatchedCodeStart, _TEXT @@ -99,7 +68,8 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT ifdef _DEBUG ; In debug builds, this just contains jump to the debug version of the write barrier by default - jmp JIT_WriteBarrier_Debug + mov rax, JIT_WriteBarrier_Debug + jmp rax endif ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP @@ -388,6 +358,51 @@ endif ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT +Section segment para 'DATA' + + align 16 + + public JIT_WriteBarrier_Loc +JIT_WriteBarrier_Loc: + dq 0 + +LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT + ; JIT_WriteBarrier(Object** dst, Object* src) + jmp QWORD PTR [JIT_WriteBarrier_Loc] +LEAF_END JIT_WriteBarrier_Callable, _TEXT + +; There is an even more optimized version of these helpers possible which takes +; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2 +; that check (this is more significant in the JIT_WriteBarrier case). +; +; Additionally we can look into providing helpers which will take the src/dest from +; specific registers (like x86) which _could_ (??) make for easier register allocation +; for the JIT64, however it might lead to having to have some nasty code that treats +; these guys really special like... :(. +; +; Version that does the move, checks whether or not it's in the GC and whether or not +; it needs to have it's card updated +; +; void JIT_CheckedWriteBarrier(Object** dst, Object* src) +LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT + + ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + ; but if it isn't then it will just return. + ; + ; See if this is in GCHeap + cmp rcx, [g_lowest_address] + jb NotInHeap + cmp rcx, [g_highest_address] + jnb NotInHeap + + jmp QWORD PTR [JIT_WriteBarrier_Loc] + + NotInHeap: + ; See comment above about possible AV + mov [rcx], rdx + ret +LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT + ; The following helper will access ("probe") a word on each page of the stack ; starting with the page right beneath rsp down to the one pointed to by r11. ; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame. diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index a13afb4878511..8109886d0c969 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -32,26 +32,14 @@ LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT // See if this is in GCHeap PREPARE_EXTERNAL_VAR g_lowest_address, rax cmp rdi, [rax] -#ifdef FEATURE_WRITEBARRIER_COPY // jb NotInHeap .byte 0x72, 0x12 -#else - // jb NotInHeap - .byte 0x72, 0x0e -#endif PREPARE_EXTERNAL_VAR g_highest_address, rax cmp rdi, [rax] -#ifdef FEATURE_WRITEBARRIER_COPY // jnb NotInHeap .byte 0x73, 0x06 jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)] -#else - // jnb NotInHeap - .byte 0x73, 0x02 - // jmp C_FUNC(JIT_WriteBarrier) - .byte 0xeb, 0x05 -#endif NotInHeap: // See comment above about possible AV @@ -398,11 +386,17 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT -#ifdef FEATURE_WRITEBARRIER_COPY // When JIT_WriteBarrier is copied into an allocated page, // helpers use this global variable to jump to it. This variable is set in InitThreadManager. - .global _JIT_WriteBarrier_Loc - .zerofill __DATA,__common,_JIT_WriteBarrier_Loc,8,3 + .global C_FUNC(JIT_WriteBarrier_Loc) +#ifdef TARGET_OSX + .zerofill __DATA,__common,C_FUNC(JIT_WriteBarrier_Loc),8,3 +#else + .data + C_FUNC(JIT_WriteBarrier_Loc): + .quad 0 + .text +#endif // ------------------------------------------------------------------ // __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) @@ -412,8 +406,6 @@ LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT jmp [rip + C_FUNC(JIT_WriteBarrier_Loc)] LEAF_END JIT_WriteBarrier_Callable, _TEXT -#endif // FEATURE_WRITEBARRIER_COPY - // The following helper will access ("probe") a word on each page of the stack // starting with the page right beneath rsp down to the one pointed to by r11. diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp index 38bff78a54cb0..02b023777b8a9 100644 --- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp +++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp @@ -293,7 +293,10 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier, // the memcpy must come before the switch statment because the asserts inside the switch // are actually looking into the JIT_WriteBarrier buffer - memcpy(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize()); + { + ExecutableWriterHolder writeBarrierWriterHolder(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), GetCurrentWriteBarrierSize()); + memcpy(writeBarrierWriterHolder.GetRW(), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize()); + } switch (newWriteBarrier) { @@ -544,7 +547,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) // Change immediate if different from new g_ephermeral_high. if (*(UINT64*)m_pUpperBoundImmediate != (size_t)g_ephemeral_high) { - *(UINT64*)m_pUpperBoundImmediate = (size_t)g_ephemeral_high; + ExecutableWriterHolder upperBoundWriterHolder((UINT64*)m_pUpperBoundImmediate, sizeof(UINT64)); + *upperBoundWriterHolder.GetRW() = (size_t)g_ephemeral_high; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } } @@ -557,7 +561,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended) // Change immediate if different from new g_ephermeral_low. if (*(UINT64*)m_pLowerBoundImmediate != (size_t)g_ephemeral_low) { - *(UINT64*)m_pLowerBoundImmediate = (size_t)g_ephemeral_low; + ExecutableWriterHolder lowerBoundImmediateWriterHolder((UINT64*)m_pLowerBoundImmediate, sizeof(UINT64)); + *lowerBoundImmediateWriterHolder.GetRW() = (size_t)g_ephemeral_low; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } break; @@ -609,7 +614,8 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus #endif // FEATURE_SVR_GC if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table) { - *(UINT64*)m_pWriteWatchTableImmediate = (size_t)g_sw_ww_table; + ExecutableWriterHolder writeWatchTableImmediateWriterHolder((UINT64*)m_pWriteWatchTableImmediate, sizeof(UINT64)); + *writeWatchTableImmediateWriterHolder.GetRW() = (size_t)g_sw_ww_table; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } break; @@ -621,14 +627,16 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table) { - *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table; + ExecutableWriterHolder cardTableImmediateWriterHolder((UINT64*)m_pCardTableImmediate, sizeof(UINT64)); + *cardTableImmediateWriterHolder.GetRW() = (size_t)g_card_table; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES if (*(UINT64*)m_pCardBundleTableImmediate != (size_t)g_card_bundle_table) { - *(UINT64*)m_pCardBundleTableImmediate = (size_t)g_card_bundle_table; + ExecutableWriterHolder cardBundleTableImmediateWriterHolder((UINT64*)m_pCardBundleTableImmediate, sizeof(UINT64)); + *cardBundleTableImmediateWriterHolder.GetRW() = (size_t)g_card_bundle_table; stompWBCompleteActions |= SWB_ICACHE_FLUSH; } #endif diff --git a/src/coreclr/vm/arm/armsinglestepper.cpp b/src/coreclr/vm/arm/armsinglestepper.cpp index 79317263b2223..f9e718ae5420e 100644 --- a/src/coreclr/vm/arm/armsinglestepper.cpp +++ b/src/coreclr/vm/arm/armsinglestepper.cpp @@ -97,11 +97,7 @@ ArmSingleStepper::ArmSingleStepper() ArmSingleStepper::~ArmSingleStepper() { #if !defined(DACCESS_COMPILE) -#ifdef TARGET_UNIX SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(WORD)); -#else - DeleteExecutable(m_rgCode); -#endif #endif } @@ -110,11 +106,7 @@ void ArmSingleStepper::Init() #if !defined(DACCESS_COMPILE) if (m_rgCode == NULL) { -#ifdef TARGET_UNIX m_rgCode = (WORD *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(WORD))); -#else - m_rgCode = new (executable) WORD[kMaxCodeBuffer]; -#endif } #endif } @@ -287,6 +279,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) DWORD idxNextInstruction = 0; + ExecutableWriterHolder codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); + if (m_originalITState.InITBlock() && !ConditionHolds(pCtx, m_originalITState.CurrentCondition())) { LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper: Case 1: ITState::Clear;\n")); @@ -295,7 +289,7 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) // to execute. We'll put the correct value back during fixup. ITState::Clear(pCtx); m_fSkipIT = true; - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; } else if (TryEmulate(pCtx, opcode1, opcode2, false)) { @@ -308,8 +302,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) m_fEmulate = true; // Set breakpoints to stop the execution. This will get us right back here. - m_rgCode[idxNextInstruction++] = kBreakpointOp; - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; } else { @@ -323,24 +317,24 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx) // guarantee one of them will be hit (we don't care which one -- the fixup code will update // the PC and IT state to make it look as though the CPU just executed the current // instruction). - m_rgCode[idxNextInstruction++] = opcode1; + codeWriterHolder.GetRW()[idxNextInstruction++] = opcode1; if (Is32BitInstruction(opcode1)) - m_rgCode[idxNextInstruction++] = opcode2; + codeWriterHolder.GetRW()[idxNextInstruction++] = opcode2; - m_rgCode[idxNextInstruction++] = kBreakpointOp; - m_rgCode[idxNextInstruction++] = kBreakpointOp; - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; } // Always terminate the redirection buffer with a breakpoint. - m_rgCode[idxNextInstruction++] = kBreakpointOp; + codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp; _ASSERTE(idxNextInstruction <= kMaxCodeBuffer); // Set the thread up so it will redirect to our buffer when execution resumes. pCtx->Pc = ((DWORD)(DWORD_PTR)m_rgCode) | THUMB_CODE; // Make sure the CPU sees the updated contents of the buffer. - FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode)); + FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); // Done, set the state. m_state = Applied; diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 930395b56dc7e..3faa8fe36846e 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -978,6 +978,16 @@ g_rgWriteBarrierDescriptors: .global g_rgWriteBarrierDescriptors +// ------------------------------------------------------------------ +// __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) + LEAF_ENTRY JIT_WriteBarrier_Callable + + // Branch to the write barrier + ldr r2, =JIT_WriteBarrier_Loc // or R3? See targetarm.h + ldr pc, [r2] + + LEAF_END JIT_WriteBarrier_Callable + #ifdef FEATURE_READYTORUN NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler diff --git a/src/coreclr/vm/arm/asmhelpers.asm b/src/coreclr/vm/arm/asmhelpers.asm index d20540e62090e..82596e66693dc 100644 --- a/src/coreclr/vm/arm/asmhelpers.asm +++ b/src/coreclr/vm/arm/asmhelpers.asm @@ -1724,6 +1724,18 @@ tempReg SETS "$tmpReg" END_WRITE_BARRIERS + IMPORT JIT_WriteBarrier_Loc + +; ------------------------------------------------------------------ +; __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val) + LEAF_ENTRY JIT_WriteBarrier_Callable + + ; Branch to the write barrier + ldr r2, =JIT_WriteBarrier_Loc ; or R3? See targetarm.h + ldr pc, [r2] + + LEAF_END + #ifdef FEATURE_READYTORUN NESTED_ENTRY DelayLoad_MethodCall_FakeProlog diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 88d0c6802b69d..425c286558432 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -1069,6 +1069,7 @@ struct StubPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -1095,6 +1096,7 @@ struct StubPrecode { return (TADDR)InterlockedCompareExchange( (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; } +#endif // !DACCESS_COMPILE #ifdef FEATURE_PREJIT void Fixup(DataImage *image); @@ -1167,6 +1169,13 @@ struct FixupPrecode { return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); } + size_t GetSizeRW() + { + LIMITED_METHOD_CONTRACT; + + return GetBase() + sizeof(void*) - dac_cast(this); + } + TADDR GetMethodDesc(); PCODE GetTarget() @@ -1175,6 +1184,7 @@ struct FixupPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -1201,6 +1211,7 @@ struct FixupPrecode { return (TADDR)InterlockedCompareExchange( (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected; } +#endif // !DACCESS_COMPILE static BOOL IsFixupPrecodeByASM(PCODE addr) { @@ -1256,6 +1267,7 @@ struct ThisPtrRetBufPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL @@ -1268,6 +1280,7 @@ struct ThisPtrRetBufPrecode { ExecutableWriterHolder precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); return FastInterlockCompareExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected; } +#endif // !DACCESS_COMPILE }; typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp index aac3e25b18146..6e62df2370338 100644 --- a/src/coreclr/vm/arm/stubs.cpp +++ b/src/coreclr/vm/arm/stubs.cpp @@ -329,16 +329,28 @@ void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength) { DWORD size = (PBYTE)JIT_PatchedWriteBarrierLast - (PBYTE)JIT_PatchedWriteBarrierStart; *ppbStart = (PBYTE)JIT_PatchedWriteBarrierStart; + if (IsWriteBarrierCopyEnabled()) + { + *ppbStart = GetWriteBarrierCodeLocation(*ppbStart); + } *pcbLength = size; } void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode) { - TADDR dst = PCODEToPINSTR(dstCode); + TADDR dst = (TADDR)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation((void*)dstCode)); TADDR src = PCODEToPINSTR(srcCode); TADDR end = PCODEToPINSTR(endCode); size_t size = (PBYTE)end - (PBYTE)src; + + ExecutableWriterHolder writeBarrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + writeBarrierWriterHolder = ExecutableWriterHolder((void*)dst, size); + dst = (TADDR)writeBarrierWriterHolder.GetRW(); + } + memcpy((PVOID)dst, (PVOID)src, size); } @@ -419,7 +431,7 @@ void UpdateGCWriteBarriers(bool postGrow = false) } #define GWB_PATCH_OFFSET(_global) \ if (pDesc->m_dw_##_global##_offset != 0xffff) \ - PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset - 1), (UINT32)(dac_cast(_global))); + PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset), (UINT32)(dac_cast(_global))); // Iterate through the write barrier patch table created in the .clrwb section // (see write barrier asm code) @@ -431,6 +443,13 @@ void UpdateGCWriteBarriers(bool postGrow = false) PBYTE to = FindWBMapping(pDesc->m_pFuncStart); if(to) { + to = (PBYTE)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation(to)); + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(to, pDesc->m_pFuncEnd - pDesc->m_pFuncStart); + to = barrierWriterHolder.GetRW(); + } GWB_PATCH_OFFSET(g_lowest_address); GWB_PATCH_OFFSET(g_highest_address); GWB_PATCH_OFFSET(g_ephemeral_low); diff --git a/src/coreclr/vm/arm64/arm64singlestepper.cpp b/src/coreclr/vm/arm64/arm64singlestepper.cpp index d45925311a33e..6c1764647c9f2 100644 --- a/src/coreclr/vm/arm64/arm64singlestepper.cpp +++ b/src/coreclr/vm/arm64/arm64singlestepper.cpp @@ -46,11 +46,7 @@ Arm64SingleStepper::Arm64SingleStepper() Arm64SingleStepper::~Arm64SingleStepper() { #if !defined(DACCESS_COMPILE) -#ifdef TARGET_UNIX SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(uint32_t)); -#else - DeleteExecutable(m_rgCode); -#endif #endif } @@ -59,11 +55,7 @@ void Arm64SingleStepper::Init() #if !defined(DACCESS_COMPILE) if (m_rgCode == NULL) { -#ifdef TARGET_UNIX m_rgCode = (uint32_t *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(uint32_t))); -#else - m_rgCode = new (executable) uint32_t[kMaxCodeBuffer]; -#endif } #endif } @@ -207,7 +199,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx) unsigned int idxNextInstruction = 0; - ExecutableWriterHolder codeWriterHolder(m_rgCode, sizeof(m_rgCode)); + ExecutableWriterHolder codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); if (TryEmulate(pCtx, opcode, false)) { @@ -230,7 +222,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx) pCtx->Pc = (uint64_t)m_rgCode; // Make sure the CPU sees the updated contents of the buffer. - FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode)); + FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0])); // Done, set the state. m_state = Applied; diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index e6b47d07b2b0c..8ef66586cd22c 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -270,13 +270,9 @@ LOCAL_LABEL(EphemeralCheckEnabled): ldr x7, [x12] // Update wbs state -#ifdef FEATURE_WRITEBARRIER_COPY PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Table_Loc, x12 ldr x12, [x12] add x12, x12, x9 -#else // FEATURE_WRITEBARRIER_COPY - adr x12, LOCAL_LABEL(wbs_begin) -#endif // FEATURE_WRITEBARRIER_COPY stp x0, x1, [x12], 16 stp x2, x3, [x12], 16 @@ -295,16 +291,10 @@ LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT mov x14, x0 // x14 = dst mov x15, x1 // x15 = val -#ifdef FEATURE_WRITEBARRIER_COPY -LOCAL_LABEL(Branch_JIT_WriteBarrier_Copy): // Branch to the write barrier PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Loc, x17 ldr x17, [x17] br x17 -#else // FEATURE_WRITEBARRIER_COPY - // Branch to the write barrier - b C_FUNC(JIT_WriteBarrier) -#endif // FEATURE_WRITEBARRIER_COPY LEAF_END JIT_WriteBarrier_Callable, _TEXT .balign 64 // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index ffbeb9fd1acb3..17d3a676940bd 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -61,6 +61,10 @@ #ifdef FEATURE_COMINTEROP IMPORT CLRToCOMWorker #endif // FEATURE_COMINTEROP + + IMPORT JIT_WriteBarrier_Table_Loc + IMPORT JIT_WriteBarrier_Loc + TEXTAREA ;; LPVOID __stdcall GetCurrentIP(void); @@ -308,6 +312,7 @@ ThePreStubPatchLabel ; x12 will be used for pointers mov x8, x0 + mov x9, x1 adrp x12, g_card_table ldr x0, [x12, g_card_table] @@ -346,7 +351,9 @@ EphemeralCheckEnabled ldr x7, [x12, g_highest_address] ; Update wbs state - adr x12, wbs_begin + adrp x12, JIT_WriteBarrier_Table_Loc + ldr x12, [x12, JIT_WriteBarrier_Table_Loc] + add x12, x12, x9 stp x0, x1, [x12], 16 stp x2, x3, [x12], 16 stp x4, x5, [x12], 16 @@ -355,9 +362,11 @@ EphemeralCheckEnabled EPILOG_RESTORE_REG_PAIR fp, lr, #16! EPILOG_RETURN + WRITE_BARRIER_END JIT_UpdateWriteBarrierState + ; Begin patchable literal pool ALIGN 64 ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line - + WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table wbs_begin wbs_card_table DCQ 0 @@ -375,14 +384,7 @@ wbs_lowest_address DCQ 0 wbs_highest_address DCQ 0 - - WRITE_BARRIER_END JIT_UpdateWriteBarrierState - -; ------------------------------------------------------------------ -; End of the writeable code region - LEAF_ENTRY JIT_PatchedCodeLast - ret lr - LEAF_END + WRITE_BARRIER_END JIT_WriteBarrier_Table ; void JIT_ByRefWriteBarrier ; On entry: @@ -546,6 +548,12 @@ Exit ret lr WRITE_BARRIER_END JIT_WriteBarrier +; ------------------------------------------------------------------ +; End of the writeable code region + LEAF_ENTRY JIT_PatchedCodeLast + ret lr + LEAF_END + #ifdef FEATURE_PREJIT ;------------------------------------------------ ; VirtualMethodFixupStub @@ -1417,9 +1425,10 @@ CallHelper2 mov x14, x0 ; x14 = dst mov x15, x1 ; x15 = val - ; Branch to the write barrier (which is already correctly overwritten with - ; single or multi-proc code based on the current CPU - b JIT_WriteBarrier + ; Branch to the write barrier + adrp x17, JIT_WriteBarrier_Loc + ldr x17, [x17, JIT_WriteBarrier_Loc] + br x17 LEAF_END diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h index 83e56cfb9f9b9..0641d89ff1a91 100644 --- a/src/coreclr/vm/arm64/cgencpu.h +++ b/src/coreclr/vm/arm64/cgencpu.h @@ -597,6 +597,7 @@ struct StubPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -623,6 +624,7 @@ struct StubPrecode { return (TADDR)InterlockedCompareExchange64( (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; } +#endif // !DACCESS_COMPILE #ifdef FEATURE_PREJIT void Fixup(DataImage *image); @@ -715,6 +717,13 @@ struct FixupPrecode { return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); } + size_t GetSizeRW() + { + LIMITED_METHOD_CONTRACT; + + return GetBase() + sizeof(void*) - dac_cast(this); + } + TADDR GetMethodDesc(); PCODE GetTarget() @@ -723,6 +732,7 @@ struct FixupPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -749,6 +759,7 @@ struct FixupPrecode { return (TADDR)InterlockedCompareExchange64( (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; } +#endif // !DACCESS_COMPILE static BOOL IsFixupPrecodeByASM(PCODE addr) { @@ -797,6 +808,7 @@ struct ThisPtrRetBufPrecode { return m_pTarget; } +#ifndef DACCESS_COMPILE BOOL SetTargetInterlocked(TADDR target, TADDR expected) { CONTRACTL @@ -810,6 +822,7 @@ struct ThisPtrRetBufPrecode { return (TADDR)InterlockedCompareExchange64( (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected; } +#endif // !DACCESS_COMPILE }; typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode; diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp index 54cf1c4927548..12d56ddb9867e 100644 --- a/src/coreclr/vm/arm64/stubs.cpp +++ b/src/coreclr/vm/arm64/stubs.cpp @@ -1067,8 +1067,14 @@ extern "C" void STDCALL JIT_PatchedCodeLast(); static void UpdateWriteBarrierState(bool skipEphemeralCheck) { BYTE *writeBarrierCodeStart = GetWriteBarrierCodeLocation((void*)JIT_PatchedCodeStart); - ExecutableWriterHolder writeBarrierWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); - JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierWriterHolder.GetRW() - writeBarrierCodeStart); + BYTE *writeBarrierCodeStartRW = writeBarrierCodeStart; + ExecutableWriterHolder writeBarrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + writeBarrierWriterHolder = ExecutableWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); + writeBarrierCodeStartRW = writeBarrierWriterHolder.GetRW(); + } + JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierCodeStartRW - writeBarrierCodeStart); } void InitJITHelpers1() diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp index cdc5925234af9..b60aac924d2e2 100644 --- a/src/coreclr/vm/ceemain.cpp +++ b/src/coreclr/vm/ceemain.cpp @@ -607,6 +607,11 @@ void EESocketCleanupHelper(bool isExecutingOnAltStack) #endif // TARGET_UNIX #endif // CROSSGEN_COMPILE +void FatalErrorHandler(UINT errorCode, LPCWSTR pszMessage) +{ + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(errorCode, pszMessage); +} + void EEStartupHelper() { CONTRACTL @@ -670,6 +675,8 @@ void EEStartupHelper() // This needs to be done before the EE has started InitializeStartupFlags(); + IfFailGo(ExecutableAllocator::StaticInitialize(FatalErrorHandler)); + ThreadpoolMgr::StaticInitialize(); MethodDescBackpatchInfoTracker::StaticInitialize(); @@ -824,7 +831,7 @@ void EEStartupHelper() g_runtimeLoadedBaseAddress = (SIZE_T)pe.GetBase(); g_runtimeVirtualSize = (SIZE_T)pe.GetVirtualSize(); - InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64)); + ExecutableAllocator::InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64)); } #endif // !TARGET_UNIX diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp index 02feec829a76b..5c5004f56860a 100644 --- a/src/coreclr/vm/class.cpp +++ b/src/coreclr/vm/class.cpp @@ -153,7 +153,9 @@ void EEClass::Destruct(MethodTable * pOwningMT) if (pDelegateEEClass->m_pStaticCallStub) { - BOOL fStubDeleted = pDelegateEEClass->m_pStaticCallStub->DecRef(); + ExecutableWriterHolder stubWriterHolder(pDelegateEEClass->m_pStaticCallStub, sizeof(Stub)); + BOOL fStubDeleted = stubWriterHolder.GetRW()->DecRef(); + if (fStubDeleted) { DelegateInvokeStubManager::g_pManager->RemoveStub(pDelegateEEClass->m_pStaticCallStub); @@ -167,7 +169,6 @@ void EEClass::Destruct(MethodTable * pOwningMT) // it is owned by the m_pMulticastStubCache, not by the class // - it is shared across classes. So we don't decrement // its ref count here - delete pDelegateEEClass->m_pUMThunkMarshInfo; } #ifdef FEATURE_COMINTEROP diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 37220786fedda..78721292a3e9f 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -2139,8 +2139,7 @@ VOID EEJitManager::EnsureJumpStubReserve(BYTE * pImageBase, SIZE_T imageSize, SI return; // Unable to allocate the reserve - give up } - pNewReserve->m_ptr = ClrVirtualAllocWithinRange(loAddrCurrent, hiAddrCurrent, - allocChunk, MEM_RESERVE, PAGE_NOACCESS); + pNewReserve->m_ptr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(allocChunk, loAddrCurrent, hiAddrCurrent); if (pNewReserve->m_ptr != NULL) break; @@ -2231,8 +2230,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap if (!pInfo->getThrowOnOutOfMemoryWithinRange() && PEDecoder::GetForceRelocs()) RETURN NULL; #endif - pBaseAddr = ClrVirtualAllocWithinRange(loAddr, hiAddr, - reserveSize, MEM_RESERVE, PAGE_NOACCESS); + pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(reserveSize, loAddr, hiAddr); if (!pBaseAddr) { @@ -2251,7 +2249,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap } else { - pBaseAddr = ClrVirtualAllocExecutable(reserveSize, MEM_RESERVE, PAGE_NOACCESS); + pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(reserveSize); if (!pBaseAddr) ThrowOutOfMemory(); } @@ -2686,15 +2684,14 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo *pAllocatedSize = sizeof(CodeHeader) + totalSize; -#if defined(HOST_OSX) && defined(HOST_ARM64) -#define FEATURE_WXORX -#endif - -#ifdef FEATURE_WXORX - pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize]; -#else - pCodeHdrRW = pCodeHdr; -#endif + if (ExecutableAllocator::IsWXORXEnabled()) + { + pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize]; + } + else + { + pCodeHdrRW = pCodeHdr; + } #ifdef USE_INDIRECT_CODEHEADER if (requestInfo.IsDynamicDomain()) @@ -3347,7 +3344,7 @@ void EEJitManager::Unload(LoaderAllocator *pAllocator) } } - ResetCodeAllocHint(); + ExecutableAllocator::ResetCodeAllocHint(); } EEJitManager::DomainCodeHeapList::DomainCodeHeapList() diff --git a/src/coreclr/vm/comcallablewrapper.cpp b/src/coreclr/vm/comcallablewrapper.cpp index 8b95dac8cdd77..499880dc16dde 100644 --- a/src/coreclr/vm/comcallablewrapper.cpp +++ b/src/coreclr/vm/comcallablewrapper.cpp @@ -3183,12 +3183,11 @@ void ComMethodTable::Cleanup() if (m_pDispatchInfo) delete m_pDispatchInfo; - if (m_pMDescr) - DeleteExecutable(m_pMDescr); if (m_pITypeInfo && !g_fProcessDetach) SafeRelease(m_pITypeInfo); - DeleteExecutable(this); + // The m_pMDescr and the current instance is allocated from the related LoaderAllocator + // so no cleanup is needed here. } @@ -3214,7 +3213,7 @@ void ComMethodTable::LayOutClassMethodTable() SLOT *pComVtable; unsigned cbPrevSlots = 0; unsigned cbAlloc = 0; - NewExecutableHolder pMDMemoryPtr = NULL; + AllocMemHolder pMDMemoryPtr; BYTE* pMethodDescMemory = NULL; size_t writeableOffset = 0; unsigned cbNumParentVirtualMethods = 0; @@ -3321,7 +3320,7 @@ void ComMethodTable::LayOutClassMethodTable() cbAlloc = cbMethodDescs; if (cbAlloc > 0) { - pMDMemoryPtr = (BYTE*) new (executable) BYTE[cbAlloc + sizeof(UINT_PTR)]; + pMDMemoryPtr = m_pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbAlloc + sizeof(UINT_PTR))); pMethodDescMemory = pMDMemoryPtr; methodDescMemoryWriteableHolder = ExecutableWriterHolder(pMethodDescMemory, cbAlloc + sizeof(UINT_PTR)); @@ -3703,7 +3702,6 @@ BOOL ComMethodTable::LayOutInterfaceMethodTable(MethodTable* pClsMT) // Method descs are at the end of the vtable // m_cbSlots interfaces methods + IUnk methods pMethodDescMemory = (BYTE *)&pComVtable[m_cbSlots]; - for (i = 0; i < cbSlots; i++) { ComCallMethodDesc* pNewMD = (ComCallMethodDesc *) (pMethodDescMemory + COMMETHOD_PREPAD); @@ -4495,13 +4493,12 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForClass(MethodTable if (cbToAlloc.IsOverflow()) ThrowHR(COR_E_OVERFLOW); - NewExecutableHolder pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()]; + AllocMemHolder pComMT(pClassMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value()))); _ASSERTE(!cbNewSlots.IsOverflow() && !cbTotalSlots.IsOverflow() && !cbVtable.IsOverflow()); ExecutableWriterHolder comMTWriterHolder(pComMT, cbToAlloc.Value()); ComMethodTable* pComMTRW = comMTWriterHolder.GetRW(); - // set up the header pComMTRW->m_ptReserved = (SLOT)(size_t)0xDEADC0FF; // reserved pComMTRW->m_pMT = pClassMT; // pointer to the class method table @@ -4573,7 +4570,7 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForInterface(MethodT if (cbToAlloc.IsOverflow()) ThrowHR(COR_E_OVERFLOW); - NewExecutableHolder pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()]; + AllocMemHolder pComMT(pInterfaceMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value()))); _ASSERTE(!cbVtable.IsOverflow() && !cbMethDescs.IsOverflow()); @@ -4639,7 +4636,8 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForBasic(MethodTable unsigned cbVtable = cbExtraSlots * sizeof(SLOT); unsigned cbToAlloc = sizeof(ComMethodTable) + cbVtable; - NewExecutableHolder pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc]; + AllocMemHolder pComMT(pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc))); + ExecutableWriterHolder comMTWriterHolder(pComMT, cbToAlloc); ComMethodTable* pComMTRW = comMTWriterHolder.GetRW(); diff --git a/src/coreclr/vm/comcallablewrapper.h b/src/coreclr/vm/comcallablewrapper.h index 2581ddf832fd5..0f1e4b878e4c9 100644 --- a/src/coreclr/vm/comcallablewrapper.h +++ b/src/coreclr/vm/comcallablewrapper.h @@ -499,6 +499,7 @@ struct ComMethodTable // Accessor for the IDispatch information. DispatchInfo* GetDispatchInfo(); +#ifndef DACCESS_COMPILE LONG AddRef() { LIMITED_METHOD_CONTRACT; @@ -527,6 +528,7 @@ struct ComMethodTable return cbRef; } +#endif // DACCESS_COMPILE CorIfaceAttr GetInterfaceType() { @@ -746,6 +748,7 @@ struct ComMethodTable } +#ifndef DACCESS_COMPILE inline REFIID GetIID() { // Cannot use a normal CONTRACT since the return type is ref type which @@ -768,6 +771,7 @@ struct ComMethodTable return m_IID; } +#endif // DACCESS_COMPILE void CheckParentComVisibility(BOOL fForIDispatch) { diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp index b6c17260a1302..1b61e16dec5d3 100644 --- a/src/coreclr/vm/comdelegate.cpp +++ b/src/coreclr/vm/comdelegate.cpp @@ -1253,7 +1253,7 @@ LPVOID COMDelegate::ConvertToCallback(OBJECTREF pDelegateObj) { GCX_PREEMP(); - pUMThunkMarshInfo = new UMThunkMarshInfo(); + pUMThunkMarshInfo = (UMThunkMarshInfo*)(void*)pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(sizeof(UMThunkMarshInfo))); ExecutableWriterHolder uMThunkMarshInfoWriterHolder(pUMThunkMarshInfo, sizeof(UMThunkMarshInfo)); uMThunkMarshInfoWriterHolder.GetRW()->LoadTimeInit(pInvokeMeth); diff --git a/src/coreclr/vm/dllimportcallback.cpp b/src/coreclr/vm/dllimportcallback.cpp index 4a88f81df5210..4f3cf879d10a4 100644 --- a/src/coreclr/vm/dllimportcallback.cpp +++ b/src/coreclr/vm/dllimportcallback.cpp @@ -41,7 +41,7 @@ class UMEntryThunkFreeList { WRAPPER_NO_CONTRACT; - m_crst.Init(CrstLeafLock, CRST_UNSAFE_ANYMODE); + m_crst.Init(CrstUMEntryThunkFreeListLock, CRST_UNSAFE_ANYMODE); } UMEntryThunk *GetUMEntryThunk() diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp index 9dae86aca9377..541d88dc16885 100644 --- a/src/coreclr/vm/dynamicmethod.cpp +++ b/src/coreclr/vm/dynamicmethod.cpp @@ -403,8 +403,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo) if (pInfo->m_loAddr != NULL || pInfo->m_hiAddr != NULL) { - m_pBaseAddr = ClrVirtualAllocWithinRange(pInfo->m_loAddr, pInfo->m_hiAddr, - ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS); + m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(ReserveBlockSize, pInfo->m_loAddr, pInfo->m_hiAddr); if (!m_pBaseAddr) { if (pInfo->getThrowOnOutOfMemoryWithinRange()) @@ -417,7 +416,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo) // top up the ReserveBlockSize to suggested minimum ReserveBlockSize = max(ReserveBlockSize, pInfo->getReserveSize()); - m_pBaseAddr = ClrVirtualAllocExecutable(ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS); + m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(ReserveBlockSize); if (!m_pBaseAddr) ThrowOutOfMemory(); } @@ -749,7 +748,7 @@ HostCodeHeap::TrackAllocation* HostCodeHeap::AllocMemory_NoThrow(size_t header, if (m_pLastAvailableCommittedAddr + sizeToCommit <= m_pBaseAddr + m_TotalBytesAvailable) { - if (NULL == ClrVirtualAlloc(m_pLastAvailableCommittedAddr, sizeToCommit, MEM_COMMIT, PAGE_EXECUTE_READWRITE)) + if (NULL == ExecutableAllocator::Instance()->Commit(m_pLastAvailableCommittedAddr, sizeToCommit, true /* isExecutable */)) { LOG((LF_BCL, LL_ERROR, "CodeHeap [0x%p] - VirtualAlloc failed\n", this)); return NULL; diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 1b192e683695a..55828b7c22b86 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6679,14 +6679,12 @@ AdjustContextForJITHelpers( PCODE ip = GetIP(pContext); -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(ip)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame ip = AdjustWriteBarrierIP(ip); SetIP(pContext, ip); } -#endif // FEATURE_WRITEBARRIER_COPY #ifdef FEATURE_DATABREAKPOINT diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index 7fff234ca85ef..4af702fab1499 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -4694,14 +4694,12 @@ VOID DECLSPEC_NORETURN UnwindManagedExceptionPass1(PAL_SEHException& ex, CONTEXT break; } -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(controlPc)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame controlPc = AdjustWriteBarrierIP(controlPc); SetIP(frameContext, controlPc); } -#endif // FEATURE_WRITEBARRIER_COPY UINT_PTR sp = GetSP(frameContext); @@ -5174,13 +5172,11 @@ BOOL IsSafeToHandleHardwareException(PCONTEXT contextRecord, PEXCEPTION_RECORD e { PCODE controlPc = GetIP(contextRecord); -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(controlPc)) { // Pretend we were executing the barrier function at its original location controlPc = AdjustWriteBarrierIP(controlPc); } -#endif // FEATURE_WRITEBARRIER_COPY return g_fEEStarted && ( exceptionRecord->ExceptionCode == STATUS_BREAKPOINT || @@ -5259,14 +5255,12 @@ BOOL HandleHardwareException(PAL_SEHException* ex) { GCX_COOP(); // Must be cooperative to modify frame chain. -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(controlPc)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame controlPc = AdjustWriteBarrierIP(controlPc); SetIP(ex->GetContextRecord(), controlPc); } -#endif // FEATURE_WRITEBARRIER_COPY if (IsIPInMarkedJitHelper(controlPc)) { diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp index be856dbe1a63a..9ce0cc676f7a7 100644 --- a/src/coreclr/vm/gccover.cpp +++ b/src/coreclr/vm/gccover.cpp @@ -1258,9 +1258,9 @@ void RemoveGcCoverageInterrupt(TADDR instrPtr, BYTE * savedInstrPtr, GCCoverageI { ExecutableWriterHolder instrPtrWriterHolder((void*)instrPtr, 4); #ifdef TARGET_ARM - if (GetARMInstructionLength(savedInstrPtr) == 2) + if (GetARMInstructionLength(savedInstrPtr) == 2) *(WORD *)instrPtrWriterHolder.GetRW() = *(WORD *)savedInstrPtr; - else + else *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr; #elif defined(TARGET_ARM64) *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr; diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S index facce7cacd3ef..dc56da1d1779e 100644 --- a/src/coreclr/vm/i386/jithelp.S +++ b/src/coreclr/vm/i386/jithelp.S @@ -377,10 +377,27 @@ LEAF_ENTRY JIT_WriteBarrierGroup, _TEXT ret LEAF_END JIT_WriteBarrierGroup, _TEXT -#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS -// ******************************************************************************* -// Write barrier wrappers with fcall calling convention -// + .data + .align 4 + .global C_FUNC(JIT_WriteBarrierEAX_Loc) +C_FUNC(JIT_WriteBarrierEAX_Loc): + .word 0 + .text + +LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT + mov eax, edx + mov edx, ecx + push eax + call 1f +1: + pop eax +2: + add eax, offset _GLOBAL_OFFSET_TABLE_+1 // (2b - 1b) + mov eax, dword ptr [eax + C_FUNC(JIT_WriteBarrierEAX_Loc)@GOT] + xchg eax, dword ptr [esp] + ret +LEAF_END JIT_WriteBarrier_Callable, _TEXT + .macro UniversalWriteBarrierHelper name .align 4 @@ -392,6 +409,11 @@ LEAF_END JIT_\name, _TEXT .endm +#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS +// ******************************************************************************* +// Write barrier wrappers with fcall calling convention +// + // Only define these if we're using the ASM GC write barriers; if this flag is not defined, // we'll use C++ versions of these write barriers. UniversalWriteBarrierHelper CheckedWriteBarrier diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm index 3743ac3cbe02f..3650b3f2afd6d 100644 --- a/src/coreclr/vm/i386/jithelp.asm +++ b/src/coreclr/vm/i386/jithelp.asm @@ -411,15 +411,13 @@ ENDM ;******************************************************************************* ; Write barrier wrappers with fcall calling convention ; -UniversalWriteBarrierHelper MACRO name + + .data ALIGN 4 -PUBLIC @JIT_&name&@8 -@JIT_&name&@8 PROC - mov eax,edx - mov edx,ecx - jmp _JIT_&name&EAX@0 -@JIT_&name&@8 ENDP -ENDM + public _JIT_WriteBarrierEAX_Loc +_JIT_WriteBarrierEAX_Loc dd 0 + + .code ; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of ; WriteBarrier functions so can determine if got AV in them. @@ -429,6 +427,25 @@ _JIT_WriteBarrierGroup@0 PROC ret _JIT_WriteBarrierGroup@0 ENDP + ALIGN 4 +PUBLIC @JIT_WriteBarrier_Callable@8 +@JIT_WriteBarrier_Callable@8 PROC + mov eax,edx + mov edx,ecx + jmp DWORD PTR [_JIT_WriteBarrierEAX_Loc] + +@JIT_WriteBarrier_Callable@8 ENDP + +UniversalWriteBarrierHelper MACRO name + ALIGN 4 +PUBLIC @JIT_&name&@8 +@JIT_&name&@8 PROC + mov eax,edx + mov edx,ecx + jmp _JIT_&name&EAX@0 +@JIT_&name&@8 ENDP +ENDM + ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS ; Only define these if we're using the ASM GC write barriers; if this flag is not defined, ; we'll use C++ versions of these write barriers. @@ -1233,6 +1250,8 @@ fremloopd: ; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code. ; + ALIGN 4 + _JIT_PatchedCodeStart@0 proc public ret _JIT_PatchedCodeStart@0 endp diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp index cefe7ecadc5e9..c5ebf8e0cf15c 100644 --- a/src/coreclr/vm/i386/jitinterfacex86.cpp +++ b/src/coreclr/vm/i386/jitinterfacex86.cpp @@ -1039,10 +1039,18 @@ void InitJITHelpers1() { BYTE * pfunc = (BYTE *) JIT_WriteBarrierReg_PreGrow; - BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]); int reg = c_rgWriteBarrierRegs[iBarrier]; - memcpy(pBuf, pfunc, 34); + BYTE * pBufRW = pBuf; + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(pBuf, 34); + pBufRW = barrierWriterHolder.GetRW(); + } + + memcpy(pBufRW, pfunc, 34); // assert the copied code ends in a ret to make sure we got the right length _ASSERTE(pBuf[33] == 0xC3); @@ -1058,24 +1066,24 @@ void InitJITHelpers1() _ASSERTE(pBuf[0] == 0x89); // Update the reg field (bits 3..5) of the ModR/M byte of this instruction - pBuf[1] &= 0xc7; - pBuf[1] |= reg << 3; + pBufRW[1] &= 0xc7; + pBufRW[1] |= reg << 3; // Second instruction to patch is cmp reg, imm32 (low bound) _ASSERTE(pBuf[2] == 0x81); // Here the lowest three bits in ModR/M field are the register - pBuf[3] &= 0xf8; - pBuf[3] |= reg; + pBufRW[3] &= 0xf8; + pBufRW[3] |= reg; #ifdef WRITE_BARRIER_CHECK // Don't do the fancy optimization just jump to the old one // Use the slow one from time to time in a debug build because // there are some good asserts in the unoptimized one if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK) { - pfunc = &pBuf[0]; + pfunc = &pBufRW[0]; *pfunc++ = 0xE9; // JMP c_rgDebugWriteBarriers[iBarrier] - *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (pfunc + sizeof(DWORD)); + *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (&pBuf[1] + sizeof(DWORD)); } #endif // WRITE_BARRIER_CHECK } @@ -1121,7 +1129,7 @@ void ValidateWriteBarrierHelpers() #endif // WRITE_BARRIER_CHECK // first validate the PreGrow helper - BYTE* pWriteBarrierFunc = reinterpret_cast(JIT_WriteBarrierEAX); + BYTE* pWriteBarrierFunc = GetWriteBarrierCodeLocation(reinterpret_cast(JIT_WriteBarrierEAX)); // ephemeral region DWORD* pLocation = reinterpret_cast(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]); @@ -1159,7 +1167,7 @@ void ValidateWriteBarrierHelpers() #endif //CODECOVERAGE /*********************************************************************/ -#define WriteBarrierIsPreGrow() (((BYTE *)JIT_WriteBarrierEAX)[10] == 0xc1) +#define WriteBarrierIsPreGrow() ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[10] == 0xc1) /*********************************************************************/ @@ -1177,20 +1185,28 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) #ifdef WRITE_BARRIER_CHECK // Don't do the fancy optimization if we are checking write barrier - if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier + if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9) // we are using slow write barrier return stompWBCompleteActions; #endif // WRITE_BARRIER_CHECK // Update the lower bound. for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) { - BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]); + + BYTE * pBufRW = pBuf; + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(pBuf, 42); + pBufRW = barrierWriterHolder.GetRW(); + } // assert there is in fact a cmp r/m32, imm32 there _ASSERTE(pBuf[2] == 0x81); // Update the immediate which is the lower bound of the ephemeral generation - size_t *pfunc = (size_t *) &pBuf[AnyGrow_EphemeralLowerBound]; + size_t *pfunc = (size_t *) &pBufRW[AnyGrow_EphemeralLowerBound]; //avoid trivial self modifying code if (*pfunc != (size_t) g_ephemeral_low) { @@ -1203,7 +1219,7 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */) _ASSERTE(pBuf[10] == 0x81); // Update the upper bound if we are using the PostGrow thunk. - pfunc = (size_t *) &pBuf[PostGrow_EphemeralUpperBound]; + pfunc = (size_t *) &pBufRW[PostGrow_EphemeralUpperBound]; //avoid trivial self modifying code if (*pfunc != (size_t) g_ephemeral_high) { @@ -1233,7 +1249,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) #ifdef WRITE_BARRIER_CHECK // Don't do the fancy optimization if we are checking write barrier - if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9) // we are using slow write barrier + if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9) // we are using slow write barrier return stompWBCompleteActions; #endif // WRITE_BARRIER_CHECK @@ -1242,12 +1258,20 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++) { - BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier]; + BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]); int reg = c_rgWriteBarrierRegs[iBarrier]; size_t *pfunc; - // Check if we are still using the pre-grow version of the write barrier. + BYTE * pBufRW = pBuf; + ExecutableWriterHolder barrierWriterHolder; + if (IsWriteBarrierCopyEnabled()) + { + barrierWriterHolder = ExecutableWriterHolder(pBuf, 42); + pBufRW = barrierWriterHolder.GetRW(); + } + + // Check if we are still using the pre-grow version of the write barrier. if (bWriteBarrierIsPreGrow) { // Check if we need to use the upper bounds checking barrier stub. @@ -1260,7 +1284,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) } pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow; - memcpy(pBuf, pfunc, 42); + memcpy(pBufRW, pfunc, 42); // assert the copied code ends in a ret to make sure we got the right length _ASSERTE(pBuf[41] == 0xC3); @@ -1276,35 +1300,35 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) _ASSERTE(pBuf[0] == 0x89); // Update the reg field (bits 3..5) of the ModR/M byte of this instruction - pBuf[1] &= 0xc7; - pBuf[1] |= reg << 3; + pBufRW[1] &= 0xc7; + pBufRW[1] |= reg << 3; // Second instruction to patch is cmp reg, imm32 (low bound) _ASSERTE(pBuf[2] == 0x81); // Here the lowest three bits in ModR/M field are the register - pBuf[3] &= 0xf8; - pBuf[3] |= reg; + pBufRW[3] &= 0xf8; + pBufRW[3] |= reg; // Third instruction to patch is another cmp reg, imm32 (high bound) _ASSERTE(pBuf[10] == 0x81); // Here the lowest three bits in ModR/M field are the register - pBuf[11] &= 0xf8; - pBuf[11] |= reg; + pBufRW[11] &= 0xf8; + pBufRW[11] |= reg; bStompWriteBarrierEphemeral = true; // What we're trying to update is the offset field of a // cmp offset[edx], 0ffh instruction _ASSERTE(pBuf[22] == 0x80); - pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation]; *pfunc = (size_t) g_card_table; // What we're trying to update is the offset field of a // mov offset[edx], 0ffh instruction _ASSERTE(pBuf[34] == 0xC6); - pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation]; } else @@ -1313,14 +1337,14 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) // cmp offset[edx], 0ffh instruction _ASSERTE(pBuf[14] == 0x80); - pfunc = (size_t *) &pBuf[PreGrow_CardTableFirstLocation]; + pfunc = (size_t *) &pBufRW[PreGrow_CardTableFirstLocation]; *pfunc = (size_t) g_card_table; // What we're trying to update is the offset field of a // mov offset[edx], 0ffh instruction _ASSERTE(pBuf[26] == 0xC6); - pfunc = (size_t *) &pBuf[PreGrow_CardTableSecondLocation]; + pfunc = (size_t *) &pBufRW[PreGrow_CardTableSecondLocation]; } } else @@ -1329,13 +1353,13 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) // cmp offset[edx], 0ffh instruction _ASSERTE(pBuf[22] == 0x80); - pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation]; *pfunc = (size_t) g_card_table; // What we're trying to update is the offset field of a // mov offset[edx], 0ffh instruction _ASSERTE(pBuf[34] == 0xC6); - pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation]; + pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation]; } // Stick in the adjustment value. diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index 61c5dfd90cbfc..564363053fc6a 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -4829,7 +4829,7 @@ VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript) X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale); // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX) - X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0); + X86EmitCall(NewExternalCodeLabel((LPVOID) GetWriteBarrierCodeLocation(&JIT_WriteBarrierEAX)), 0); } else #else // TARGET_AMD64 diff --git a/src/coreclr/vm/i386/stublinkerx86.h b/src/coreclr/vm/i386/stublinkerx86.h index af5244d077193..564c999975e7c 100644 --- a/src/coreclr/vm/i386/stublinkerx86.h +++ b/src/coreclr/vm/i386/stublinkerx86.h @@ -536,7 +536,7 @@ struct StubPrecode { return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32)); } - +#ifndef DACCESS_COMPILE void ResetTargetInterlocked() { CONTRACTL @@ -562,6 +562,7 @@ struct StubPrecode { ExecutableWriterHolder rel32Holder(&m_rel32, 4); return rel32SetInterlocked(&m_rel32, rel32Holder.GetRW(), target, expected, (MethodDesc*)GetMethodDesc()); } +#endif // !DACCESS_COMPILE }; IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);) IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);) @@ -646,6 +647,13 @@ struct FixupPrecode { return dac_cast(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode); } + size_t GetSizeRW() + { + LIMITED_METHOD_CONTRACT; + + return GetBase() + sizeof(void*) - dac_cast(this); + } + TADDR GetMethodDesc(); #else // HAS_FIXUP_PRECODE_CHUNKS TADDR GetMethodDesc() diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index a1e4d93d881de..882e2c29cef04 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -11875,7 +11875,7 @@ WORD CEEJitInfo::getRelocTypeHint(void * target) if (m_fAllowRel32) { // The JIT calls this method for data addresses only. It always uses REL32s for direct code targets. - if (IsPreferredExecutableRange(target)) + if (ExecutableAllocator::IsPreferredExecutableRange(target)) return IMAGE_REL_BASED_REL32; } #endif // TARGET_AMD64 diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index ca9d03c2141d3..e071d0717d179 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -238,15 +238,10 @@ extern "C" FCDECL2(Object*, ChkCastAny_NoCacheLookup, CORINFO_CLASS_HANDLE type, extern "C" FCDECL2(Object*, IsInstanceOfAny_NoCacheLookup, CORINFO_CLASS_HANDLE type, Object* obj); extern "C" FCDECL2(LPVOID, Unbox_Helper, CORINFO_CLASS_HANDLE type, Object* obj); -#if defined(TARGET_ARM64) || defined(FEATURE_WRITEBARRIER_COPY) // ARM64 JIT_WriteBarrier uses speciall ABI and thus is not callable directly // Copied write barriers must be called at a different location extern "C" FCDECL2(VOID, JIT_WriteBarrier_Callable, Object **dst, Object *ref); #define WriteBarrier_Helper JIT_WriteBarrier_Callable -#else -// in other cases the regular JIT helper is callable. -#define WriteBarrier_Helper JIT_WriteBarrier -#endif extern "C" FCDECL1(void, JIT_InternalThrow, unsigned exceptNum); extern "C" FCDECL1(void*, JIT_InternalThrowFromHelper, unsigned exceptNum); @@ -344,28 +339,25 @@ EXTERN_C FCDECL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift); #ifdef TARGET_X86 +#define ENUM_X86_WRITE_BARRIER_REGISTERS() \ + X86_WRITE_BARRIER_REGISTER(EAX) \ + X86_WRITE_BARRIER_REGISTER(ECX) \ + X86_WRITE_BARRIER_REGISTER(EBX) \ + X86_WRITE_BARRIER_REGISTER(ESI) \ + X86_WRITE_BARRIER_REGISTER(EDI) \ + X86_WRITE_BARRIER_REGISTER(EBP) + extern "C" { - void STDCALL JIT_CheckedWriteBarrierEAX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierEBX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierECX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierESI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierEDI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_CheckedWriteBarrierEBP(); // JIThelp.asm/JIThelp.s - - void STDCALL JIT_DebugWriteBarrierEAX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierEBX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierECX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierESI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierEDI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_DebugWriteBarrierEBP(); // JIThelp.asm/JIThelp.s - - void STDCALL JIT_WriteBarrierEAX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierEBX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierECX(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierESI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierEDI(); // JIThelp.asm/JIThelp.s - void STDCALL JIT_WriteBarrierEBP(); // JIThelp.asm/JIThelp.s + +// JIThelp.asm/JIThelp.s +#define X86_WRITE_BARRIER_REGISTER(reg) \ + void STDCALL JIT_CheckedWriteBarrier##reg(); \ + void STDCALL JIT_DebugWriteBarrier##reg(); \ + void STDCALL JIT_WriteBarrier##reg(); + + ENUM_X86_WRITE_BARRIER_REGISTERS() +#undef X86_WRITE_BARRIER_REGISTER void STDCALL JIT_WriteBarrierGroup(); void STDCALL JIT_WriteBarrierGroup_End(); diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp index 4f222be4a2c03..0a77e4445f06f 100644 --- a/src/coreclr/vm/loaderallocator.cpp +++ b/src/coreclr/vm/loaderallocator.cpp @@ -1137,7 +1137,7 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory) _ASSERTE(dwTotalReserveMemSize <= VIRTUAL_ALLOC_RESERVE_GRANULARITY); #endif - BYTE * initReservedMem = ClrVirtualAllocExecutable(dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS); + BYTE * initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize); m_InitialReservedMemForLoaderHeaps = initReservedMem; @@ -1672,18 +1672,25 @@ void AssemblyLoaderAllocator::SetCollectible() { CONTRACTL { - THROWS; + NOTHROW; } CONTRACTL_END; m_IsCollectible = true; -#ifndef DACCESS_COMPILE - m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap); -#endif } #ifndef DACCESS_COMPILE +void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain) +{ + m_Id.Init(); + LoaderAllocator::Init((BaseDomain *)pAppDomain); + if (IsCollectible()) + { + m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap); + } +} + #ifndef CROSSGEN_COMPILE AssemblyLoaderAllocator::~AssemblyLoaderAllocator() diff --git a/src/coreclr/vm/loaderallocator.inl b/src/coreclr/vm/loaderallocator.inl index a826675ccc93c..993732d4010f8 100644 --- a/src/coreclr/vm/loaderallocator.inl +++ b/src/coreclr/vm/loaderallocator.inl @@ -21,12 +21,6 @@ inline void GlobalLoaderAllocator::Init(BaseDomain *pDomain) LoaderAllocator::Init(pDomain, m_ExecutableHeapInstance); } -inline void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain) -{ - m_Id.Init(); - LoaderAllocator::Init((BaseDomain *)pAppDomain); -} - inline BOOL LoaderAllocatorID::Equals(LoaderAllocatorID *pId) { LIMITED_METHOD_CONTRACT; diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index bd3984d8697cd..db308ab208a8e 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -4188,46 +4188,6 @@ c_CentralJumpCode = { }; #include -#elif defined(TARGET_AMD64) - -#include -static const struct CentralJumpCode { - BYTE m_movzxRAX[4]; - BYTE m_shlEAX[4]; - BYTE m_movRAX[2]; - MethodDesc* m_pBaseMD; - BYTE m_addR10RAX[3]; - BYTE m_jmp[1]; - INT32 m_rel32; - - inline void Setup(CentralJumpCode* pCodeRX, MethodDesc* pMD, PCODE target, LoaderAllocator *pLoaderAllocator) { - WRAPPER_NO_CONTRACT; - m_pBaseMD = pMD; - m_rel32 = rel32UsingJumpStub(&pCodeRX->m_rel32, target, pMD, pLoaderAllocator); - } - - inline BOOL CheckTarget(TADDR target) { - WRAPPER_NO_CONTRACT; - TADDR addr = rel32Decode(PTR_HOST_MEMBER_TADDR(CentralJumpCode, this, m_rel32)); - if (*PTR_BYTE(addr) == 0x48 && - *PTR_BYTE(addr+1) == 0xB8 && - *PTR_BYTE(addr+10) == 0xFF && - *PTR_BYTE(addr+11) == 0xE0) - { - addr = *PTR_TADDR(addr+2); - } - return (addr == target); - } -} -c_CentralJumpCode = { - { 0x48, 0x0F, 0xB6, 0xC0 }, // movzx rax,al - { 0x48, 0xC1, 0xE0, MethodDesc::ALIGNMENT_SHIFT }, // shl rax, MethodDesc::ALIGNMENT_SHIFT - { 0x49, 0xBA }, NULL, // mov r10, pBaseMD - { 0x4C, 0x03, 0xD0 }, // add r10,rax - { 0xE9 }, 0 // jmp PreStub -}; -#include - #elif defined(TARGET_ARM) #include diff --git a/src/coreclr/vm/precode.cpp b/src/coreclr/vm/precode.cpp index 80731c191e737..0bd2bd657f9ad 100644 --- a/src/coreclr/vm/precode.cpp +++ b/src/coreclr/vm/precode.cpp @@ -480,7 +480,9 @@ void Precode::Reset() #ifdef HAS_FIXUP_PRECODE_CHUNKS if (t == PRECODE_FIXUP) { - size = sizeof(FixupPrecode) + sizeof(PTR_MethodDesc); + // The writeable size the Init method accesses is dynamic depending on + // the FixupPrecode members. + size = ((FixupPrecode*)this)->GetSizeRW(); } else #endif diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp index 0971334af4d31..e61802b984950 100644 --- a/src/coreclr/vm/stackwalk.cpp +++ b/src/coreclr/vm/stackwalk.cpp @@ -713,14 +713,12 @@ UINT_PTR Thread::VirtualUnwindToFirstManagedCallFrame(T_CONTEXT* pContext) // get our caller's PSP, or our caller's caller's SP. while (!ExecutionManager::IsManagedCode(uControlPc)) { -#ifdef FEATURE_WRITEBARRIER_COPY if (IsIPInWriteBarrierCodeCopy(uControlPc)) { // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame uControlPc = AdjustWriteBarrierIP(uControlPc); SetIP(pContext, uControlPc); } -#endif // FEATURE_WRITEBARRIER_COPY #ifndef TARGET_UNIX uControlPc = VirtualUnwindCallFrame(pContext); diff --git a/src/coreclr/vm/stublink.cpp b/src/coreclr/vm/stublink.cpp index 04a33e3982613..304cb4fb35b44 100644 --- a/src/coreclr/vm/stublink.cpp +++ b/src/coreclr/vm/stublink.cpp @@ -846,7 +846,7 @@ Stub *StubLinker::Link(LoaderHeap *pHeap, DWORD flags) ); ASSERT(pStub != NULL); - bool fSuccess = EmitStub(pStub, globalsize, pHeap); + bool fSuccess = EmitStub(pStub, globalsize, size, pHeap); #ifdef STUBLINKER_GENERATES_UNWIND_INFO if (fSuccess) @@ -1007,13 +1007,13 @@ int StubLinker::CalculateSize(int* pGlobalSize) return globalsize + datasize; } -bool StubLinker::EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap) +bool StubLinker::EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap) { STANDARD_VM_CONTRACT; BYTE *pCode = (BYTE*)(pStub->GetBlob()); - ExecutableWriterHolder stubWriterHolder(pStub, sizeof(Stub)); + ExecutableWriterHolder stubWriterHolder(pStub, sizeof(Stub) + totalSize); Stub *pStubRW = stubWriterHolder.GetRW(); BYTE *pCodeRW = (BYTE*)(pStubRW->GetBlob()); @@ -2013,11 +2013,7 @@ VOID Stub::DeleteStub() FillMemory(this+1, m_numCodeBytes, 0xcc); #endif -#ifndef TARGET_UNIX - DeleteExecutable((BYTE*)GetAllocationBase()); -#else delete [] (BYTE*)GetAllocationBase(); -#endif } } @@ -2124,11 +2120,7 @@ Stub* Stub::NewStub(PTR_VOID pCode, DWORD flags) BYTE *pBlock; if (pHeap == NULL) { -#ifndef TARGET_UNIX - pBlock = new (executable) BYTE[totalSize]; -#else pBlock = new BYTE[totalSize]; -#endif } else { diff --git a/src/coreclr/vm/stublink.h b/src/coreclr/vm/stublink.h index 94326f9962ea7..9613fd48f687d 100644 --- a/src/coreclr/vm/stublink.h +++ b/src/coreclr/vm/stublink.h @@ -395,7 +395,7 @@ class StubLinker // Writes out the code element into memory following the // stub object. - bool EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap); + bool EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap); CodeRun *GetLastCodeRunIfAny(); diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp index 4dfa4a22b3fa4..c4a16d0b04484 100644 --- a/src/coreclr/vm/threads.cpp +++ b/src/coreclr/vm/threads.cpp @@ -1078,18 +1078,30 @@ DWORD_PTR Thread::OBJREF_HASH = OBJREF_TABSIZE; extern "C" void STDCALL JIT_PatchedCodeStart(); extern "C" void STDCALL JIT_PatchedCodeLast(); -#ifdef FEATURE_WRITEBARRIER_COPY - static void* s_barrierCopy = NULL; BYTE* GetWriteBarrierCodeLocation(VOID* barrier) { - return (BYTE*)s_barrierCopy + ((BYTE*)barrier - (BYTE*)JIT_PatchedCodeStart); + if (IsWriteBarrierCopyEnabled()) + { + return (BYTE*)PINSTRToPCODE((TADDR)s_barrierCopy + ((TADDR)barrier - (TADDR)JIT_PatchedCodeStart)); + } + else + { + return (BYTE*)barrier; + } } BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc) { - return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart))); + if (IsWriteBarrierCopyEnabled()) + { + return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart))); + } + else + { + return FALSE; + } } PCODE AdjustWriteBarrierIP(PCODE controlPc) @@ -1100,14 +1112,21 @@ PCODE AdjustWriteBarrierIP(PCODE controlPc) return (PCODE)JIT_PatchedCodeStart + (controlPc - (PCODE)s_barrierCopy); } +#ifdef TARGET_X86 +extern "C" void *JIT_WriteBarrierEAX_Loc; +#else extern "C" void *JIT_WriteBarrier_Loc; +#endif + #ifdef TARGET_ARM64 extern "C" void (*JIT_WriteBarrier_Table)(); extern "C" void *JIT_WriteBarrier_Loc = 0; extern "C" void *JIT_WriteBarrier_Table_Loc = 0; #endif // TARGET_ARM64 -#endif // FEATURE_WRITEBARRIER_COPY +#ifdef TARGET_ARM +extern "C" void *JIT_WriteBarrier_Loc = 0; +#endif // TARGET_ARM #ifndef TARGET_UNIX // g_TlsIndex is only used by the DAC. Disable optimizations around it to prevent it from getting optimized out. @@ -1138,50 +1157,80 @@ void InitThreadManager() _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart > (ptrdiff_t)0); _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart < (ptrdiff_t)GetOsPageSize()); -#ifdef FEATURE_WRITEBARRIER_COPY - s_barrierCopy = ClrVirtualAlloc(NULL, g_SystemInfo.dwAllocationGranularity, MEM_COMMIT, PAGE_EXECUTE_READWRITE); - if (s_barrierCopy == NULL) + if (IsWriteBarrierCopyEnabled()) { - _ASSERTE(!"ClrVirtualAlloc of GC barrier code page failed"); - COMPlusThrowWin32(); - } + s_barrierCopy = ExecutableAllocator::Instance()->Reserve(g_SystemInfo.dwAllocationGranularity); + ExecutableAllocator::Instance()->Commit(s_barrierCopy, g_SystemInfo.dwAllocationGranularity, true); + if (s_barrierCopy == NULL) + { + _ASSERTE(!"Allocation of GC barrier code page failed"); + COMPlusThrowWin32(); + } - { - size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart; - ExecutableWriterHolder barrierWriterHolder(s_barrierCopy, writeBarrierSize); - memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize); - } + { + size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart; + ExecutableWriterHolder barrierWriterHolder(s_barrierCopy, writeBarrierSize); + memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize); + } - // Store the JIT_WriteBarrier copy location to a global variable so that helpers - // can jump to it. - JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier); + // Store the JIT_WriteBarrier copy location to a global variable so that helpers + // can jump to it. +#ifdef TARGET_X86 + JIT_WriteBarrierEAX_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier)); +#define X86_WRITE_BARRIER_REGISTER(reg) \ + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_##reg, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg)); \ + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg), W("@WriteBarrier" #reg)); -#ifdef TARGET_ARM64 - // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. - JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table); + ENUM_X86_WRITE_BARRIER_REGISTERS() - SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier)); - SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier)); -#endif // TARGET_ARM64 +#undef X86_WRITE_BARRIER_REGISTER -#else // FEATURE_WRITEBARRIER_COPY +#else // TARGET_X86 + JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier); +#endif // TARGET_X86 + SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier)); + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), W("@WriteBarrier")); - // I am using virtual protect to cover the entire range that this code falls in. - // +#ifdef TARGET_ARM64 + // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. + JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table); +#endif // TARGET_ARM64 - // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth, - // so instead we'll leave it writable from here forward. +#if defined(TARGET_ARM64) || defined(TARGET_ARM) + SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier)); + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier), W("@CheckedWriteBarrier")); + SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier)); + ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier), W("@ByRefWriteBarrier")); +#endif // TARGET_ARM64 || TARGET_ARM - DWORD oldProt; - if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart, - PAGE_EXECUTE_READWRITE, &oldProt)) + } + else { - _ASSERTE(!"ClrVirtualProtect of code page failed"); - COMPlusThrowWin32(); + // I am using virtual protect to cover the entire range that this code falls in. + // + + // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth, + // so instead we'll leave it writable from here forward. + + DWORD oldProt; + if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart, + PAGE_EXECUTE_READWRITE, &oldProt)) + { + _ASSERTE(!"ClrVirtualProtect of code page failed"); + COMPlusThrowWin32(); + } + +#ifdef TARGET_X86 + JIT_WriteBarrierEAX_Loc = (void*)JIT_WriteBarrierEAX; +#else + JIT_WriteBarrier_Loc = (void*)JIT_WriteBarrier; +#endif +#ifdef TARGET_ARM64 + // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated. + JIT_WriteBarrier_Table_Loc = (void*)&JIT_WriteBarrier_Table; +#endif // TARGET_ARM64 } -#endif // FEATURE_WRITEBARRIER_COPY #ifndef TARGET_UNIX _ASSERTE(GetThreadNULLOk() == NULL); diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h index 0aadbf40260ca..8a66c0555129d 100644 --- a/src/coreclr/vm/threads.h +++ b/src/coreclr/vm/threads.h @@ -6272,18 +6272,23 @@ class ThreadStateNCStackHolder BOOL Debug_IsLockedViaThreadSuspension(); -#ifdef FEATURE_WRITEBARRIER_COPY +inline BOOL IsWriteBarrierCopyEnabled() +{ +#ifdef DACCESS_COMPILE + return FALSE; +#else // DACCESS_COMPILE +#ifdef HOST_OSX + return TRUE; +#else + return ExecutableAllocator::IsWXORXEnabled(); +#endif +#endif // DACCESS_COMPILE +} BYTE* GetWriteBarrierCodeLocation(VOID* barrier); BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc); PCODE AdjustWriteBarrierIP(PCODE controlPc); -#else // FEATURE_WRITEBARRIER_COPY - -#define GetWriteBarrierCodeLocation(barrier) ((BYTE*)(barrier)) - -#endif // FEATURE_WRITEBARRIER_COPY - #if !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE) extern thread_local Thread* t_pStackWalkerWalkingThread; #define SET_THREAD_TYPE_STACKWALKER(pThread) t_pStackWalkerWalkingThread = pThread diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp index 95d568d641c73..3af4c52afc9bb 100644 --- a/src/coreclr/vm/virtualcallstub.cpp +++ b/src/coreclr/vm/virtualcallstub.cpp @@ -641,7 +641,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA dwTotalReserveMemSize); } - initReservedMem = ClrVirtualAllocExecutable (dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS); + initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize); m_initialReservedMemForHeaps = (BYTE *) initReservedMem; @@ -2766,11 +2766,7 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStub(PCODE ad } #endif - ExecutableWriterHolder dispatchWriterHolder(holder, sizeof(DispatchHolder) -#ifdef TARGET_AMD64 - + sizeof(DispatchStubShort) -#endif - ); + ExecutableWriterHolder dispatchWriterHolder(holder, dispatchHolderSize); dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode, addrOfFail, (size_t)pMTExpected @@ -2833,9 +2829,9 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStubLong(PCODE } CONTRACT_END; //allocate from the requisite heap and copy the template over it. - DispatchHolder * holder = (DispatchHolder*) (void*) - dispatch_heap->AllocAlignedMem(DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG), CODE_SIZE_ALIGN); - ExecutableWriterHolder dispatchWriterHolder(holder, sizeof(DispatchHolder) + sizeof(DispatchStubLong)); + size_t dispatchHolderSize = DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG); + DispatchHolder * holder = (DispatchHolder*) (void*)dispatch_heap->AllocAlignedMem(dispatchHolderSize, CODE_SIZE_ALIGN); + ExecutableWriterHolder dispatchWriterHolder(holder, dispatchHolderSize); dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode, addrOfFail,