From da1940139c5bea1e6185ec38cc56621d5c09be21 Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Wed, 31 Mar 2021 16:11:47 +0200
Subject: [PATCH 1/8] W^X support

This change is the last part of enabling the W^X support. It adds the
actual executable allocator that handles all double mapped memory
allocations and creating the writeable mappings.

The platform specific functionality is placed in a new minipal that is
going to be a basis for future removal of Windows APIs usage from the
native runtime.

The last state of the change was tested on all the platforms that we
support using coreclr pri 1 tests with both the W^X enabled and disabled
using the COMPlus_EnableWXORX variable.

The debugger changes were tested using the managed debugger testing
suite on Windows x64, x86 and on Apple Silicon so far. Further testing
on other platforms is in progress.
---
 src/coreclr/CMakeLists.txt                    |   3 +
 src/coreclr/clrdefinitions.cmake              |   4 -
 src/coreclr/debug/ee/arm64/arm64walker.cpp    |   9 +-
 src/coreclr/debug/ee/controller.cpp           |  47 +-
 src/coreclr/debug/ee/controller.h             |  18 +-
 src/coreclr/debug/ee/debugger.cpp             |  30 +-
 src/coreclr/debug/ee/debugger.h               |  32 +-
 src/coreclr/debug/inc/amd64/primitives.h      |  22 +-
 src/coreclr/debug/inc/arm/primitives.h        |  13 +-
 src/coreclr/debug/inc/arm64/primitives.h      |   6 +-
 src/coreclr/debug/inc/i386/primitives.h       |  13 +-
 .../dlls/mscoree/coreclr/CMakeLists.txt       |   1 +
 src/coreclr/inc/CrstTypes.def                 |   4 +
 src/coreclr/inc/clrconfigvalues.h             |   4 +
 src/coreclr/inc/crsttypes.h                   | 175 ++--
 src/coreclr/inc/executableallocator.h         | 201 ++++-
 src/coreclr/inc/jithelpers.h                  |  12 +-
 src/coreclr/inc/utilcode.h                    |  29 -
 src/coreclr/minipal/CMakeLists.txt            |   7 +
 src/coreclr/minipal/Unix/CMakeLists.txt       |   4 +
 src/coreclr/minipal/Unix/doublemapping.cpp    | 168 ++++
 src/coreclr/minipal/Windows/CMakeLists.txt    |   4 +
 src/coreclr/minipal/Windows/doublemapping.cpp | 246 ++++++
 src/coreclr/minipal/minipal.h                 |  78 ++
 src/coreclr/utilcode/CMakeLists.txt           |   1 +
 src/coreclr/utilcode/executableallocator.cpp  | 755 ++++++++++++++++++
 src/coreclr/utilcode/loaderheap.cpp           |  18 +-
 src/coreclr/utilcode/util.cpp                 | 162 ----
 src/coreclr/vm/CMakeLists.txt                 |   4 +-
 src/coreclr/vm/amd64/JitHelpers_Fast.asm      |  79 +-
 src/coreclr/vm/amd64/jithelpers_fast.S        |  26 +-
 src/coreclr/vm/amd64/jitinterfaceamd64.cpp    |  20 +-
 src/coreclr/vm/arm/armsinglestepper.cpp       |  30 +-
 src/coreclr/vm/arm/asmhelpers.S               |  10 +
 src/coreclr/vm/arm/asmhelpers.asm             |  12 +
 src/coreclr/vm/arm/cgencpu.h                  |  13 +
 src/coreclr/vm/arm/stubs.cpp                  |  19 +-
 src/coreclr/vm/arm64/arm64singlestepper.cpp   |  12 +-
 src/coreclr/vm/arm64/asmhelpers.S             |  10 -
 src/coreclr/vm/arm64/asmhelpers.asm           |  35 +-
 src/coreclr/vm/arm64/cgencpu.h                |  13 +
 src/coreclr/vm/arm64/stubs.cpp                |  10 +-
 src/coreclr/vm/ceemain.cpp                    |   9 +-
 src/coreclr/vm/class.cpp                      |   5 +-
 src/coreclr/vm/codeman.cpp                    |  27 +-
 src/coreclr/vm/comcallablewrapper.cpp         |  18 +-
 src/coreclr/vm/comcallablewrapper.h           |   4 +
 src/coreclr/vm/comdelegate.cpp                |   2 +-
 src/coreclr/vm/dynamicmethod.cpp              |   7 +-
 src/coreclr/vm/excep.cpp                      |   2 -
 src/coreclr/vm/exceptionhandling.cpp          |   6 -
 src/coreclr/vm/gccover.cpp                    |   4 +-
 src/coreclr/vm/i386/jithelp.S                 |  30 +-
 src/coreclr/vm/i386/jithelp.asm               |  35 +-
 src/coreclr/vm/i386/jitinterfacex86.cpp       |  84 +-
 src/coreclr/vm/i386/stublinkerx86.cpp         |   2 +-
 src/coreclr/vm/i386/stublinkerx86.h           |  10 +-
 src/coreclr/vm/jitinterface.cpp               |   2 +-
 src/coreclr/vm/jitinterface.h                 |   5 -
 src/coreclr/vm/loaderallocator.cpp            |  17 +-
 src/coreclr/vm/loaderallocator.inl            |   6 -
 src/coreclr/vm/method.cpp                     |  40 -
 src/coreclr/vm/precode.cpp                    |   4 +-
 src/coreclr/vm/stackwalk.cpp                  |   2 -
 src/coreclr/vm/stublink.cpp                   |  14 +-
 src/coreclr/vm/stublink.h                     |   2 +-
 src/coreclr/vm/threads.cpp                    | 119 ++-
 src/coreclr/vm/threads.h                      |  19 +-
 src/coreclr/vm/virtualcallstub.cpp            |   2 +-
 69 files changed, 2139 insertions(+), 697 deletions(-)
 create mode 100644 src/coreclr/minipal/CMakeLists.txt
 create mode 100644 src/coreclr/minipal/Unix/CMakeLists.txt
 create mode 100644 src/coreclr/minipal/Unix/doublemapping.cpp
 create mode 100644 src/coreclr/minipal/Windows/CMakeLists.txt
 create mode 100644 src/coreclr/minipal/Windows/doublemapping.cpp
 create mode 100644 src/coreclr/minipal/minipal.h
 create mode 100644 src/coreclr/utilcode/executableallocator.cpp

diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt
index 78aa969473525..b4a4859342702 100644
--- a/src/coreclr/CMakeLists.txt
+++ b/src/coreclr/CMakeLists.txt
@@ -119,6 +119,8 @@ add_subdirectory(pal/prebuilt/inc)
 
 add_subdirectory(debug/debug-pal)
 
+add_subdirectory(minipal)
+
 if(CLR_CMAKE_TARGET_WIN32)
   add_subdirectory(gc/sample)
 endif()
@@ -171,6 +173,7 @@ include_directories("classlibnative/cryptography")
 include_directories("classlibnative/inc")
 include_directories("${GENERATED_INCLUDE_DIR}")
 include_directories("hosts/inc")
+include_directories("minipal")
 
 if(CLR_CMAKE_TARGET_WIN32 AND FEATURE_EVENT_TRACE)
     include_directories("${GENERATED_INCLUDE_DIR}/etw")
diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake
index eeb421cac4c2f..0485ff99a99eb 100644
--- a/src/coreclr/clrdefinitions.cmake
+++ b/src/coreclr/clrdefinitions.cmake
@@ -224,10 +224,6 @@ if(CLR_CMAKE_TARGET_WIN32)
   endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)
 endif(CLR_CMAKE_TARGET_WIN32)
 
-if(CLR_CMAKE_TARGET_OSX)
-  add_definitions(-DFEATURE_WRITEBARRIER_COPY)
-endif(CLR_CMAKE_TARGET_OSX)
-
 if (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32)
   add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_EH_FUNCLETS>)
 endif (NOT CLR_CMAKE_TARGET_ARCH_I386 OR NOT CLR_CMAKE_TARGET_WIN32)
diff --git a/src/coreclr/debug/ee/arm64/arm64walker.cpp b/src/coreclr/debug/ee/arm64/arm64walker.cpp
index ae6e8c1fc2933..6c4dee9349700 100644
--- a/src/coreclr/debug/ee/arm64/arm64walker.cpp
+++ b/src/coreclr/debug/ee/arm64/arm64walker.cpp
@@ -171,7 +171,14 @@ BYTE*  NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context,
     {
         CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, 0xd503201f); //Add Nop in buffer
 
-        m_pSharedPatchBypassBuffer->RipTargetFixup = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+        ExecutableWriterHolder<UINT_PTR> ripTargetFixupWriterHolder(&m_pSharedPatchBypassBuffer->RipTargetFixup, sizeof(UINT_PTR));
+        UINT_PTR *pRipTargetFixupRW = ripTargetFixupWriterHolder.GetRW();
+#else // HOST_OSX && HOST_ARM64
+        UINT_PTR *pRipTargetFixupRW = &m_pSharedPatchBypassBuffer->RipTargetFixup;
+#endif // HOST_OSX && HOST_ARM64
+
+        *pRipTargetFixupRW = ip; //Control Flow simulation alone is done DebuggerPatchSkip::TriggerExceptionHook
         LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x  is a Control Flow instr \n", opcode));
 
         if (walk == WALK_CALL) //initialize Lr
diff --git a/src/coreclr/debug/ee/controller.cpp b/src/coreclr/debug/ee/controller.cpp
index b17ae8f115002..f9304d16ab070 100644
--- a/src/coreclr/debug/ee/controller.cpp
+++ b/src/coreclr/debug/ee/controller.cpp
@@ -84,8 +84,13 @@ SharedPatchBypassBuffer* DebuggerControllerPatch::GetOrCreateSharedPatchBypassBu
     if (m_pSharedPatchBypassBuffer == NULL)
     {
         void *pSharedPatchBypassBufferRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(SharedPatchBypassBuffer));
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         ExecutableWriterHolder<SharedPatchBypassBuffer> sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX, sizeof(SharedPatchBypassBuffer));
-        new (sharedPatchBypassBufferWriterHolder.GetRW()) SharedPatchBypassBuffer();
+        void *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW();
+#else // HOST_OSX && HOST_ARM64
+        void *pSharedPatchBypassBufferRW = pSharedPatchBypassBufferRX;
+#endif // HOST_OSX && HOST_ARM64
+        new (pSharedPatchBypassBufferRW) SharedPatchBypassBuffer();
         m_pSharedPatchBypassBuffer = (SharedPatchBypassBuffer*)pSharedPatchBypassBufferRX;
 
         _ASSERTE(m_pSharedPatchBypassBuffer);
@@ -4351,7 +4356,15 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
     //
 
     m_pSharedPatchBypassBuffer = patch->GetOrCreateSharedPatchBypassBuffer();
-    BYTE* patchBypass = m_pSharedPatchBypassBuffer->PatchBypass;
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+    ExecutableWriterHolder<SharedPatchBypassBuffer> sharedPatchBypassBufferWriterHolder((SharedPatchBypassBuffer*)m_pSharedPatchBypassBuffer, sizeof(SharedPatchBypassBuffer));
+    SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = sharedPatchBypassBufferWriterHolder.GetRW();
+#else // HOST_OSX && HOST_ARM64
+    SharedPatchBypassBuffer *pSharedPatchBypassBufferRW = m_pSharedPatchBypassBuffer;
+#endif // HOST_OSX && HOST_ARM64
+
+    BYTE* patchBypassRX = m_pSharedPatchBypassBuffer->PatchBypass;
+    BYTE* patchBypassRW = pSharedPatchBypassBufferRW->PatchBypass;
     LOG((LF_CORDB, LL_INFO10000, "DPS::DPS: Patch skip for opcode 0x%.4x at address %p buffer allocated at 0x%.8x\n", patch->opcode, patch->address, m_pSharedPatchBypassBuffer));
 
     // Copy the instruction block over to the patch skip
@@ -4367,19 +4380,19 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
     // the 2nd skip executes the new jump-stamp code and not the original method prologue code. Copying
     // the code every time ensures that we have the most up-to-date version of the code in the buffer.
     _ASSERTE( patch->IsBound() );
-    CopyInstructionBlock(patchBypass, (const BYTE *)patch->address);
+    CopyInstructionBlock(patchBypassRW, (const BYTE *)patch->address);
 
     // Technically, we could create a patch skipper for an inactive patch, but we rely on the opcode being
     // set here.
     _ASSERTE( patch->IsActivated() );
-    CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, patch->opcode);
+    CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypassRW, patch->opcode);
 
     LOG((LF_CORDB, LL_EVERYTHING, "SetInstruction was called\n"));
     //
     // Look at instruction to get some attributes
     //
 
-    NativeWalker::DecodeInstructionForPatchSkip(patchBypass, &(m_instrAttrib));
+    NativeWalker::DecodeInstructionForPatchSkip(patchBypassRX, &(m_instrAttrib));
 
 #if defined(TARGET_AMD64)
 
@@ -4395,33 +4408,33 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
         // Populate the RIP-relative buffer with the current value if needed
         //
 
-        BYTE* bufferBypass = m_pSharedPatchBypassBuffer->BypassBuffer;
+        BYTE* bufferBypassRW = pSharedPatchBypassBufferRW->BypassBuffer;
 
         // Overwrite the *signed* displacement.
-        int dwOldDisp = *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]);
+        int dwOldDisp = *(int*)(&patchBypassRX[m_instrAttrib.m_dwOffsetToDisp]);
         int dwNewDisp = offsetof(SharedPatchBypassBuffer, BypassBuffer) -
                           (offsetof(SharedPatchBypassBuffer, PatchBypass) + m_instrAttrib.m_cbInstr);
-        *(int*)(&patchBypass[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp;
+        *(int*)(&patchBypassRW[m_instrAttrib.m_dwOffsetToDisp]) = dwNewDisp;
 
         // This could be an LEA, which we'll just have to change into a MOV
         // and copy the original address
-        if (((patchBypass[0] == 0x4C) || (patchBypass[0] == 0x48)) && (patchBypass[1] == 0x8d))
+        if (((patchBypassRX[0] == 0x4C) || (patchBypassRX[0] == 0x48)) && (patchBypassRX[1] == 0x8d))
         {
-            patchBypass[1] = 0x8b; // MOV reg, mem
+            patchBypassRW[1] = 0x8b; // MOV reg, mem
             _ASSERTE((int)sizeof(void*) <= SharedPatchBypassBuffer::cbBufferBypass);
-            *(void**)bufferBypass = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
+            *(void**)bufferBypassRW = (void*)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
         }
         else
         {
             _ASSERTE(m_instrAttrib.m_cOperandSize <= SharedPatchBypassBuffer::cbBufferBypass);
             // Copy the data into our buffer.
-            memcpy(bufferBypass, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize);
+            memcpy(bufferBypassRW, patch->address + m_instrAttrib.m_cbInstr + dwOldDisp, m_instrAttrib.m_cOperandSize);
 
             if (m_instrAttrib.m_fIsWrite)
             {
                 // save the actual destination address and size so when we TriggerSingleStep() we can update the value
-                m_pSharedPatchBypassBuffer->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
-                m_pSharedPatchBypassBuffer->RipTargetFixupSize = m_instrAttrib.m_cOperandSize;
+                pSharedPatchBypassBufferRW->RipTargetFixup = (UINT_PTR)(patch->address + m_instrAttrib.m_cbInstr + dwOldDisp);
+                pSharedPatchBypassBufferRW->RipTargetFixupSize = m_instrAttrib.m_cOperandSize;
             }
         }
     }
@@ -4490,17 +4503,17 @@ DebuggerPatchSkip::DebuggerPatchSkip(Thread *thread,
 #else // FEATURE_EMULATE_SINGLESTEP
 
 #ifdef TARGET_ARM64
-    patchBypass = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode);
+    patchBypassRX = NativeWalker::SetupOrSimulateInstructionForPatchSkip(context, m_pSharedPatchBypassBuffer, (const BYTE *)patch->address, patch->opcode);
 #endif //TARGET_ARM64
 
     //set eip to point to buffer...
-    SetIP(context, (PCODE)patchBypass);
+    SetIP(context, (PCODE)patchBypassRX);
 
     if (context ==(T_CONTEXT*) &c)
         thread->SetThreadContext(&c);
 
 
-    LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypass, patch->opcode));
+    LOG((LF_CORDB, LL_INFO10000, "DPS::DPS Bypass at 0x%p for opcode %p \n", patchBypassRX, patch->opcode));
 
     //
     // Turn on single step (if the platform supports it) so we can
diff --git a/src/coreclr/debug/ee/controller.h b/src/coreclr/debug/ee/controller.h
index 12b1106f7a4b2..6996439c31fba 100644
--- a/src/coreclr/debug/ee/controller.h
+++ b/src/coreclr/debug/ee/controller.h
@@ -266,14 +266,28 @@ class SharedPatchBypassBuffer
 
     LONG AddRef()
     {
-        LONG newRefCount = InterlockedIncrement(&m_refCount);
+#if !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64)
+        ExecutableWriterHolder<LONG> refCountWriterHolder(&m_refCount, sizeof(LONG));
+        LONG *pRefCountRW = refCountWriterHolder.GetRW();
+#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+        LONG *pRefCountRW = &m_refCount;
+#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+
+        LONG newRefCount = InterlockedIncrement(pRefCountRW);
         _ASSERTE(newRefCount > 0);
         return newRefCount;
     }
 
     LONG Release()
     {
-        LONG newRefCount = InterlockedDecrement(&m_refCount);
+#if !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+        ExecutableWriterHolder<LONG> refCountWriterHolder(&m_refCount, sizeof(LONG));
+        LONG *pRefCountRW = refCountWriterHolder.GetRW();
+#else // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+        LONG *pRefCountRW = &m_refCount;
+#endif // !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+
+        LONG newRefCount = InterlockedDecrement(pRefCountRW);
         _ASSERTE(newRefCount >= 0);
 
         if (newRefCount == 0)
diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp
index 53ee5555ace43..e4563a31757f4 100644
--- a/src/coreclr/debug/ee/debugger.cpp
+++ b/src/coreclr/debug/ee/debugger.cpp
@@ -1317,13 +1317,19 @@ DebuggerEval::DebuggerEval(CONTEXT * pContext, DebuggerIPCE_FuncEvalInfo * pEval
 
     // Allocate the breakpoint instruction info in executable memory.
     void *bpInfoSegmentRX = g_pDebugger->GetInteropSafeExecutableHeap()->Alloc(sizeof(DebuggerEvalBreakpointInfoSegment));
+
+#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX) && defined(HOST_ARM64)
     ExecutableWriterHolder<DebuggerEvalBreakpointInfoSegment> bpInfoSegmentWriterHolder((DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX, sizeof(DebuggerEvalBreakpointInfoSegment));
-    new (bpInfoSegmentWriterHolder.GetRW()) DebuggerEvalBreakpointInfoSegment(this);
+    DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = bpInfoSegmentWriterHolder.GetRW();
+#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+    DebuggerEvalBreakpointInfoSegment *bpInfoSegmentRW = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX;
+#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX && HOST_ARM64
+    new (bpInfoSegmentRW) DebuggerEvalBreakpointInfoSegment(this);
     m_bpInfoSegment = (DebuggerEvalBreakpointInfoSegment*)bpInfoSegmentRX;
 
     // This must be non-zero so that the saved opcode is non-zero, and on IA64 we want it to be 0x16
     // so that we can have a breakpoint instruction in any slot in the bundle.
-    bpInfoSegmentWriterHolder.GetRW()->m_breakpointInstruction[0] = 0x16;
+    bpInfoSegmentRW->m_breakpointInstruction[0] = 0x16;
 #if defined(TARGET_ARM)
     USHORT *bp = (USHORT*)&m_bpInfoSegment->m_breakpointInstruction;
     *bp = CORDbg_BREAK_INSTRUCTION;
@@ -16234,6 +16240,7 @@ void Debugger::ReleaseDebuggerDataLock(Debugger *pDebugger)
 }
 #endif // DACCESS_COMPILE
 
+#ifndef DACCESS_COMPILE
 /* ------------------------------------------------------------------------ *
  * Functions for DebuggerHeap executable memory allocations
  * ------------------------------------------------------------------------ */
@@ -16378,6 +16385,7 @@ void* DebuggerHeapExecutableMemoryAllocator::GetPointerToChunkWithUsageUpdate(De
 
     return page->GetPointerToChunk(chunkNumber);
 }
+#endif // DACCESS_COMPILE
 
 /* ------------------------------------------------------------------------ *
  * DebuggerHeap impl
@@ -16412,7 +16420,7 @@ void DebuggerHeap::Destroy()
         m_hHeap = NULL;
     }
 #endif
-#ifndef HOST_WINDOWS
+#if !defined(HOST_WINDOWS) && !defined(DACCESS_COMPILE)
     if (m_execMemAllocator != NULL)
     {
         delete m_execMemAllocator;
@@ -16439,6 +16447,8 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+
     // Have knob catch if we don't want to lazy init the debugger.
     _ASSERTE(!g_DbgShouldntUseDebugger);
     m_fExecutable = fExecutable;
@@ -16472,7 +16482,9 @@ HRESULT DebuggerHeap::Init(BOOL fExecutable)
             return E_OUTOFMEMORY;
         }
     }
-#endif
+#endif    
+
+#endif // !DACCESS_COMPILE
 
     return S_OK;
 }
@@ -16549,7 +16561,10 @@ void *DebuggerHeap::Alloc(DWORD size)
     size += sizeof(InteropHeapCanary);
 #endif
 
-    void *ret;
+    void *ret = NULL;
+
+#ifndef DACCESS_COMPILE
+
 #ifdef USE_INTEROPSAFE_HEAP
     _ASSERTE(m_hHeap != NULL);
     ret = ::HeapAlloc(m_hHeap, HEAP_ZERO_MEMORY, size);
@@ -16585,7 +16600,7 @@ void *DebuggerHeap::Alloc(DWORD size)
     InteropHeapCanary * pCanary = InteropHeapCanary::GetFromRawAddr(ret);
     ret = pCanary->GetUserAddr();
 #endif
-
+#endif // !DACCESS_COMPILE
     return ret;
 }
 
@@ -16638,6 +16653,8 @@ void DebuggerHeap::Free(void *pMem)
     }
     CONTRACTL_END;
 
+#ifndef DACCESS_COMPILE
+
 #ifdef USE_INTEROPSAFE_CANARY
     // Check for canary
 
@@ -16673,6 +16690,7 @@ void DebuggerHeap::Free(void *pMem)
 #endif // HOST_WINDOWS
     }
 #endif
+#endif // !DACCESS_COMPILE
 }
 
 #ifndef DACCESS_COMPILE
diff --git a/src/coreclr/debug/ee/debugger.h b/src/coreclr/debug/ee/debugger.h
index f16f8cd6d9d9d..5503de2459099 100644
--- a/src/coreclr/debug/ee/debugger.h
+++ b/src/coreclr/debug/ee/debugger.h
@@ -1054,6 +1054,8 @@ constexpr uint64_t CHUNKS_PER_DEBUGGERHEAP=(DEBUGGERHEAP_PAGESIZE / EXPECTED_CHU
 constexpr uint64_t MAX_CHUNK_MASK=((1ull << CHUNKS_PER_DEBUGGERHEAP) - 1);
 constexpr uint64_t BOOKKEEPING_CHUNK_MASK (1ull << (CHUNKS_PER_DEBUGGERHEAP - 1));
 
+#ifndef DACCESS_COMPILE
+
 // Forward declaration
 struct DebuggerHeapExecutableMemoryPage;
 
@@ -1110,8 +1112,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage
 
     inline void SetNextPage(DebuggerHeapExecutableMemoryPage* nextPage)
     {
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
-        debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.nextPage = nextPage;
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW();
+#else
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = this;
+#endif
+        pHeapPageRW->chunks[0].bookkeeping.nextPage = nextPage;
     }
 
     inline uint64_t GetPageOccupancy() const
@@ -1124,8 +1131,13 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage
         // Can't unset the bookmark chunk!
         ASSERT((newOccupancy & BOOKKEEPING_CHUNK_MASK) != 0);
         ASSERT(newOccupancy <= MAX_CHUNK_MASK);
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
-        debuggerHeapPageWriterHolder.GetRW()->chunks[0].bookkeeping.pageOccupancy = newOccupancy;
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW();
+#else
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = this;
+#endif
+        pHeapPageRW->chunks[0].bookkeeping.pageOccupancy = newOccupancy;
     }
 
     inline void* GetPointerToChunk(int chunkNum) const
@@ -1136,14 +1148,18 @@ struct DECLSPEC_ALIGN(DEBUGGERHEAP_PAGESIZE) DebuggerHeapExecutableMemoryPage
 
     DebuggerHeapExecutableMemoryPage()
     {
-        ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
-
         SetPageOccupancy(BOOKKEEPING_CHUNK_MASK); // only the first bit is set.
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+        ExecutableWriterHolder<DebuggerHeapExecutableMemoryPage> debuggerHeapPageWriterHolder(this, sizeof(DebuggerHeapExecutableMemoryPage));
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = debuggerHeapPageWriterHolder.GetRW();
+#else
+        DebuggerHeapExecutableMemoryPage *pHeapPageRW = this;
+#endif
         for (uint8_t i = 1; i < CHUNKS_PER_DEBUGGERHEAP; i++)
         {
             ASSERT(i != 0);
-            debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.startOfPage = this;
-            debuggerHeapPageWriterHolder.GetRW()->chunks[i].data.chunkNumber = i;
+            pHeapPageRW->chunks[i].data.startOfPage = this;
+            pHeapPageRW->chunks[i].data.chunkNumber = i;
         }
     }
 
@@ -1190,6 +1206,8 @@ class DebuggerHeapExecutableMemoryAllocator
     Crst m_execMemAllocMutex;
 };
 
+#endif // DACCESS_COMPILE
+
 // ------------------------------------------------------------------------ *
 // DebuggerHeap class
 // For interop debugging, we need a heap that:
@@ -1201,6 +1219,8 @@ class DebuggerHeapExecutableMemoryAllocator
     #define USE_INTEROPSAFE_HEAP
 #endif
 
+class DebuggerHeapExecutableMemoryAllocator;
+
 class DebuggerHeap
 {
 public:
diff --git a/src/coreclr/debug/inc/amd64/primitives.h b/src/coreclr/debug/inc/amd64/primitives.h
index d8d14b24b5425..9d363938519c7 100644
--- a/src/coreclr/debug/inc/amd64/primitives.h
+++ b/src/coreclr/debug/inc/amd64/primitives.h
@@ -12,10 +12,6 @@
 #ifndef PRIMITIVES_H_
 #define PRIMITIVES_H_
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-#include "executableallocator.h"
-#endif
-
 #ifndef CORDB_ADDRESS_TYPE
 typedef const BYTE                  CORDB_ADDRESS_TYPE;
 typedef DPTR(CORDB_ADDRESS_TYPE)    PTR_CORDB_ADDRESS_TYPE;
@@ -191,14 +187,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address)
 {
     LIMITED_METHOD_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE);
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch)
+    *((unsigned char*)address) = 0xCC; // int 3 (single byte patch)
     FlushInstructionCache(GetCurrentProcess(), address, 1);
 
 }
@@ -209,14 +198,7 @@ inline void CORDbgSetInstruction(UNALIGNED CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> instructionWriterHolder(address, sizeof(unsigned char));
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    *((unsigned char*)addressRW) =
+    *((unsigned char*)address) =
         (unsigned char) instruction;    // setting one byte is important
     FlushInstructionCache(GetCurrentProcess(), address, 1);
 
diff --git a/src/coreclr/debug/inc/arm/primitives.h b/src/coreclr/debug/inc/arm/primitives.h
index c4e2d28602e56..269281eb006be 100644
--- a/src/coreclr/debug/inc/arm/primitives.h
+++ b/src/coreclr/debug/inc/arm/primitives.h
@@ -12,10 +12,6 @@
 #ifndef PRIMITIVES_H_
 #define PRIMITIVES_H_
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-#include "executableallocator.h"
-#endif
-
 #ifndef THUMB_CODE
 #define THUMB_CODE 1
 #endif
@@ -163,14 +159,7 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> instructionWriterHolder(address, sizeof(PRD_TYPE));
-    CORDB_ADDRESS_TYPE* addressRW = instructionWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)addressRW;
+    CORDB_ADDRESS ptraddr = (CORDB_ADDRESS)address;
     _ASSERTE(ptraddr & THUMB_CODE);
     ptraddr &= ~THUMB_CODE;
 
diff --git a/src/coreclr/debug/inc/arm64/primitives.h b/src/coreclr/debug/inc/arm64/primitives.h
index 4f4c3f7bcd8f2..05c03c7b3094f 100644
--- a/src/coreclr/debug/inc/arm64/primitives.h
+++ b/src/coreclr/debug/inc/arm64/primitives.h
@@ -150,13 +150,13 @@ inline void CORDbgSetInstruction(CORDB_ADDRESS_TYPE* address,
     // In a DAC build, this function assumes the input is an host address.
     LIMITED_METHOD_DAC_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
+#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE) && defined(HOST_OSX)
     ExecutableWriterHolder<void> instructionWriterHolder((LPVOID)address, sizeof(PRD_TYPE));
 
     ULONGLONG ptraddr = dac_cast<ULONGLONG>(instructionWriterHolder.GetRW());
-#else // !DBI_COMPILE && !DACCESS_COMPILE
+#else // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX
     ULONGLONG ptraddr = dac_cast<ULONGLONG>(address);
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
+#endif // !DBI_COMPILE && !DACCESS_COMPILE && HOST_OSX
     *(PRD_TYPE *)ptraddr = instruction;
     FlushInstructionCache(GetCurrentProcess(),
                           address,
diff --git a/src/coreclr/debug/inc/i386/primitives.h b/src/coreclr/debug/inc/i386/primitives.h
index 313b42c5a1970..2f228b3a3a9a1 100644
--- a/src/coreclr/debug/inc/i386/primitives.h
+++ b/src/coreclr/debug/inc/i386/primitives.h
@@ -12,10 +12,6 @@
 #ifndef PRIMITIVES_H_
 #define PRIMITIVES_H_
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-#include "executableallocator.h"
-#endif
-
 typedef const BYTE                  CORDB_ADDRESS_TYPE;
 typedef DPTR(CORDB_ADDRESS_TYPE)    PTR_CORDB_ADDRESS_TYPE;
 
@@ -151,14 +147,7 @@ inline void CORDbgInsertBreakpoint(UNALIGNED CORDB_ADDRESS_TYPE *address)
 {
     LIMITED_METHOD_CONTRACT;
 
-#if !defined(DBI_COMPILE) && !defined(DACCESS_COMPILE)
-    ExecutableWriterHolder<CORDB_ADDRESS_TYPE> breakpointWriterHolder(address, CORDbg_BREAK_INSTRUCTION_SIZE);
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = breakpointWriterHolder.GetRW();
-#else // !DBI_COMPILE && !DACCESS_COMPILE
-    UNALIGNED CORDB_ADDRESS_TYPE* addressRW = address;
-#endif // !DBI_COMPILE && !DACCESS_COMPILE
-
-    *((unsigned char*)addressRW) = 0xCC; // int 3 (single byte patch)
+    *((unsigned char*)address) = 0xCC; // int 3 (single byte patch)
     FlushInstructionCache(GetCurrentProcess(), address, 1);
 }
 
diff --git a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
index fae55ecdc3ea5..9b8e4b649864d 100644
--- a/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
+++ b/src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt
@@ -109,6 +109,7 @@ set(CORECLR_LIBRARIES
     v3binder
     System.Globalization.Native-Static
     interop
+    coreclrminipal
 )
 
 if(CLR_CMAKE_TARGET_WIN32)
diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def
index c48872a0b9424..3b67b14834e29 100644
--- a/src/coreclr/inc/CrstTypes.def
+++ b/src/coreclr/inc/CrstTypes.def
@@ -201,6 +201,10 @@ End
 Crst Exception
 End
 
+Crst ExecutableAllocatorLock
+    Unordered
+End
+
 Crst ExecuteManRangeLock
 End
 
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 3f21e41dfa369..1c57796cb2e39 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -735,6 +735,10 @@ RETAIL_CONFIG_STRING_INFO(EXTERNAL_DOTNET_DiagnosticPorts, W("DiagnosticPorts"),
 RETAIL_CONFIG_STRING_INFO(INTERNAL_LTTngConfig, W("LTTngConfig"), "Configuration for LTTng.")
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is set to 0, this will prevent the LTTng library from being loaded at runtime")
 
+//
+// Executable code
+//
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWXORX, W("EnableWXORX"), 1, "Enable W^X for executable memory.");
 
 #ifdef FEATURE_GDBJIT
 ///
diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h
index a1bab2ecb906c..462a654a62c5f 100644
--- a/src/coreclr/inc/crsttypes.h
+++ b/src/coreclr/inc/crsttypes.h
@@ -49,92 +49,93 @@ enum CrstType
     CrstEventPipe = 31,
     CrstEventStore = 32,
     CrstException = 33,
-    CrstExecuteManRangeLock = 34,
-    CrstExternalObjectContextCache = 35,
-    CrstFCall = 36,
-    CrstFuncPtrStubs = 37,
-    CrstFusionAppCtx = 38,
-    CrstGCCover = 39,
-    CrstGlobalStrLiteralMap = 40,
-    CrstHandleTable = 41,
-    CrstHostAssemblyMap = 42,
-    CrstHostAssemblyMapAdd = 43,
-    CrstIbcProfile = 44,
-    CrstIJWFixupData = 45,
-    CrstIJWHash = 46,
-    CrstILStubGen = 47,
-    CrstInlineTrackingMap = 48,
-    CrstInstMethodHashTable = 49,
-    CrstInterop = 50,
-    CrstInteropData = 51,
-    CrstIsJMCMethod = 52,
-    CrstISymUnmanagedReader = 53,
-    CrstJit = 54,
-    CrstJitGenericHandleCache = 55,
-    CrstJitInlineTrackingMap = 56,
-    CrstJitPatchpoint = 57,
-    CrstJitPerf = 58,
-    CrstJumpStubCache = 59,
-    CrstLeafLock = 60,
-    CrstListLock = 61,
-    CrstLoaderAllocator = 62,
-    CrstLoaderAllocatorReferences = 63,
-    CrstLoaderHeap = 64,
-    CrstManagedObjectWrapperMap = 65,
-    CrstMethodDescBackpatchInfoTracker = 66,
-    CrstModule = 67,
-    CrstModuleFixup = 68,
-    CrstModuleLookupTable = 69,
-    CrstMulticoreJitHash = 70,
-    CrstMulticoreJitManager = 71,
-    CrstNativeImageEagerFixups = 72,
-    CrstNativeImageLoad = 73,
-    CrstNls = 74,
-    CrstNotifyGdb = 75,
-    CrstObjectList = 76,
-    CrstPEImage = 77,
-    CrstPendingTypeLoadEntry = 78,
-    CrstPgoData = 79,
-    CrstPinnedByrefValidation = 80,
-    CrstProfilerGCRefDataFreeList = 81,
-    CrstProfilingAPIStatus = 82,
-    CrstRCWCache = 83,
-    CrstRCWCleanupList = 84,
-    CrstReadyToRunEntryPointToMethodDescMap = 85,
-    CrstReflection = 86,
-    CrstReJITGlobalRequest = 87,
-    CrstRetThunkCache = 88,
-    CrstSavedExceptionInfo = 89,
-    CrstSaveModuleProfileData = 90,
-    CrstSecurityStackwalkCache = 91,
-    CrstSigConvert = 92,
-    CrstSingleUseLock = 93,
-    CrstSpecialStatics = 94,
-    CrstStackSampler = 95,
-    CrstStressLog = 96,
-    CrstStubCache = 97,
-    CrstStubDispatchCache = 98,
-    CrstStubUnwindInfoHeapSegments = 99,
-    CrstSyncBlockCache = 100,
-    CrstSyncHashLock = 101,
-    CrstSystemBaseDomain = 102,
-    CrstSystemDomain = 103,
-    CrstSystemDomainDelayedUnloadList = 104,
-    CrstThreadIdDispenser = 105,
-    CrstThreadpoolTimerQueue = 106,
-    CrstThreadpoolWaitThreads = 107,
-    CrstThreadpoolWorker = 108,
-    CrstThreadStore = 109,
-    CrstTieredCompilation = 110,
-    CrstTypeEquivalenceMap = 111,
-    CrstTypeIDMap = 112,
-    CrstUMEntryThunkCache = 113,
-    CrstUniqueStack = 114,
-    CrstUnresolvedClassLock = 115,
-    CrstUnwindInfoTableLock = 116,
-    CrstVSDIndirectionCellLock = 117,
-    CrstWrapperTemplate = 118,
-    kNumberOfCrstTypes = 119
+    CrstExecutableAllocatorLock = 34,
+    CrstExecuteManRangeLock = 35,
+    CrstExternalObjectContextCache = 36,
+    CrstFCall = 37,
+    CrstFuncPtrStubs = 38,
+    CrstFusionAppCtx = 39,
+    CrstGCCover = 40,
+    CrstGlobalStrLiteralMap = 41,
+    CrstHandleTable = 42,
+    CrstHostAssemblyMap = 43,
+    CrstHostAssemblyMapAdd = 44,
+    CrstIbcProfile = 45,
+    CrstIJWFixupData = 46,
+    CrstIJWHash = 47,
+    CrstILStubGen = 48,
+    CrstInlineTrackingMap = 49,
+    CrstInstMethodHashTable = 50,
+    CrstInterop = 51,
+    CrstInteropData = 52,
+    CrstIsJMCMethod = 53,
+    CrstISymUnmanagedReader = 54,
+    CrstJit = 55,
+    CrstJitGenericHandleCache = 56,
+    CrstJitInlineTrackingMap = 57,
+    CrstJitPatchpoint = 58,
+    CrstJitPerf = 59,
+    CrstJumpStubCache = 60,
+    CrstLeafLock = 61,
+    CrstListLock = 62,
+    CrstLoaderAllocator = 63,
+    CrstLoaderAllocatorReferences = 64,
+    CrstLoaderHeap = 65,
+    CrstManagedObjectWrapperMap = 66,
+    CrstMethodDescBackpatchInfoTracker = 67,
+    CrstModule = 68,
+    CrstModuleFixup = 69,
+    CrstModuleLookupTable = 70,
+    CrstMulticoreJitHash = 71,
+    CrstMulticoreJitManager = 72,
+    CrstNativeImageEagerFixups = 73,
+    CrstNativeImageLoad = 74,
+    CrstNls = 75,
+    CrstNotifyGdb = 76,
+    CrstObjectList = 77,
+    CrstPEImage = 78,
+    CrstPendingTypeLoadEntry = 79,
+    CrstPgoData = 80,
+    CrstPinnedByrefValidation = 81,
+    CrstProfilerGCRefDataFreeList = 82,
+    CrstProfilingAPIStatus = 83,
+    CrstRCWCache = 84,
+    CrstRCWCleanupList = 85,
+    CrstReadyToRunEntryPointToMethodDescMap = 86,
+    CrstReflection = 87,
+    CrstReJITGlobalRequest = 88,
+    CrstRetThunkCache = 89,
+    CrstSavedExceptionInfo = 90,
+    CrstSaveModuleProfileData = 91,
+    CrstSecurityStackwalkCache = 92,
+    CrstSigConvert = 93,
+    CrstSingleUseLock = 94,
+    CrstSpecialStatics = 95,
+    CrstStackSampler = 96,
+    CrstStressLog = 97,
+    CrstStubCache = 98,
+    CrstStubDispatchCache = 99,
+    CrstStubUnwindInfoHeapSegments = 100,
+    CrstSyncBlockCache = 101,
+    CrstSyncHashLock = 102,
+    CrstSystemBaseDomain = 103,
+    CrstSystemDomain = 104,
+    CrstSystemDomainDelayedUnloadList = 105,
+    CrstThreadIdDispenser = 106,
+    CrstThreadpoolTimerQueue = 107,
+    CrstThreadpoolWaitThreads = 108,
+    CrstThreadpoolWorker = 109,
+    CrstThreadStore = 110,
+    CrstTieredCompilation = 111,
+    CrstTypeEquivalenceMap = 112,
+    CrstTypeIDMap = 113,
+    CrstUMEntryThunkCache = 114,
+    CrstUniqueStack = 115,
+    CrstUnresolvedClassLock = 116,
+    CrstUnwindInfoTableLock = 117,
+    CrstVSDIndirectionCellLock = 118,
+    CrstWrapperTemplate = 119,
+    kNumberOfCrstTypes = 120
 };
 
 #endif // __CRST_TYPES_INCLUDED
@@ -179,6 +180,7 @@ int g_rgCrstLevelMap[] =
     18,         // CrstEventPipe
     0,          // CrstEventStore
     0,          // CrstException
+    -1,         // CrstExecutableAllocatorLock
     0,          // CrstExecuteManRangeLock
     0,          // CrstExternalObjectContextCache
     3,          // CrstFCall
@@ -303,6 +305,7 @@ LPCSTR g_rgCrstNameMap[] =
     "CrstEventPipe",
     "CrstEventStore",
     "CrstException",
+    "CrstExecutableAllocatorLock",
     "CrstExecuteManRangeLock",
     "CrstExternalObjectContextCache",
     "CrstFCall",
diff --git a/src/coreclr/inc/executableallocator.h b/src/coreclr/inc/executableallocator.h
index ce0c6c22f890e..101178f9a4ef0 100644
--- a/src/coreclr/inc/executableallocator.h
+++ b/src/coreclr/inc/executableallocator.h
@@ -11,6 +11,191 @@
 #include "utilcode.h"
 #include "ex.h"
 
+#include "minipal.h"
+
+#ifndef DACCESS_COMPILE
+
+// This class is responsible for allocation of all the executable memory in the runtime.
+class ExecutableAllocator
+{
+    // RX address range block descriptor
+    struct BlockRX
+    {
+        // Next block in a linked list
+        BlockRX* next;
+        // Base address of the block
+        void* baseRX;
+        // Size of the block
+        size_t size;
+        // Offset of the block in the shared memory
+        size_t offset;
+    };
+
+    // RW address range block descriptor
+    struct BlockRW
+    {
+        // Next block in a linked list
+        BlockRW* next;
+        // Base address of the RW mapping of the block
+        void* baseRW;
+        // Base address of the RX mapping of the block
+        void* baseRX;
+        // Size of the block
+        size_t size;
+        // Usage reference count of the RW block. RW blocks can be reused
+        // when multiple mappings overlap in the VA space at the same time
+        // (even from multiple threads)
+        size_t refCount;
+    };
+
+    typedef void (*FatalErrorHandler)(UINT errorCode, LPCWSTR pszMessage);
+
+    // Instance of the allocator
+    static ExecutableAllocator* g_instance;
+
+    // Callback to the runtime to report fatal errors
+    static FatalErrorHandler g_fatalErrorHandler;
+
+#if USE_UPPER_ADDRESS
+    // Preferred region to allocate the code in.
+    static BYTE* g_codeMinAddr;
+    static BYTE* g_codeMaxAddr;
+    static BYTE* g_codeAllocStart;
+    // Next address to try to allocate for code in the preferred region.
+    static BYTE* g_codeAllocHint;
+#endif // USE_UPPER_ADDRESS
+
+    // Caches the COMPlus_EnableWXORX setting
+    static bool g_isWXorXEnabled;
+
+    // Head of the linked list of all RX blocks that were allocated by this allocator
+    BlockRX* m_pFirstBlockRX = NULL;
+
+    // Head of the linked list of free RX blocks that were allocated by this allocator and then backed out
+    BlockRX* m_pFirstFreeBlockRX = NULL;
+
+    // Head of the linked list of currently mapped RW blocks
+    BlockRW* m_pFirstBlockRW = NULL;
+
+    // Handle of the double mapped memory mapper
+    void *m_doubleMemoryMapperHandle = NULL;
+
+    // Maximum size of executable memory this allocator can allocate
+    size_t m_maxExecutableCodeSize;
+
+    // First free offset in the underlying shared memory. It is not used
+    // for platforms that don't use shared memory.
+    size_t m_freeOffset = 0;
+
+    // Last RW mapping cached so that it can be reused for the next mapping
+    // request if it goes into the same range.
+    BlockRW* m_cachedMapping = NULL;
+
+    // Synchronization of the public allocator methods
+    CRITSEC_COOKIE m_CriticalSection;
+
+    // Update currently cached mapping. If the passed in block is the same as the one
+    // in the cache, it keeps it cached. Otherwise it destroys the currently cached one
+    // and replaces it by the passed in one.
+    void UpdateCachedMapping(BlockRW *pBlock);
+
+    // Find existing RW block that maps the whole specified range of RX memory.
+    // Return NULL if no such block exists.
+    void* FindRWBlock(void* baseRX, size_t size);
+
+    // Add RW block to the list of existing RW blocks
+    bool AddRWBlock(void* baseRW, void* baseRX, size_t size);
+
+    // Remove RW block from the list of existing RW blocks and return the base
+    // address and size the underlying memory was mapped at.
+    // Return false if no existing RW block contains the passed in address.
+    bool RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize);
+
+    // Find a free block with the closest size >= the requested size.
+    // Returns NULL if no such block exists.
+    BlockRX* FindBestFreeBlock(size_t size);
+
+    // Return memory mapping granularity.
+    static size_t Granularity();
+
+    // Allocate a block of executable memory of the specified size.
+    // It doesn't acquire the actual virtual memory, just the
+    // range of the underlying shared memory.
+    BlockRX* AllocateBlock(size_t size, bool* pIsFreeBlock);
+
+    // Backout the block allocated by AllocateBlock in case of an
+    // error.
+    void BackoutBlock(BlockRX* pBlock, bool isFreeBlock);
+
+    // Allocate range of offsets in the underlying shared memory
+    bool AllocateOffset(size_t* pOffset, size_t size);
+
+    // Add RX block to the linked list of existing blocks
+    void AddRXBlock(BlockRX *pBlock);
+
+    // Return true if double mapping is enabled.
+    static bool IsDoubleMappingEnabled();
+
+    // Initialize the allocator instance
+    bool Initialize();
+
+public:
+
+    // Return the ExecuteAllocator singleton instance
+    static ExecutableAllocator* Instance();
+
+    // Initialize the static members of the Executable allocator and allocate
+    // and initialize the instance of it.
+    static HRESULT StaticInitialize(FatalErrorHandler fatalErrorHandler);
+
+    // Destroy the allocator
+    ~ExecutableAllocator();
+
+    // Return true if W^X is enabled
+    static bool IsWXORXEnabled();
+
+    // Use this function to initialize the g_codeAllocHint
+    // during startup. base is runtime .dll base address,
+    // size is runtime .dll virtual size.
+    static void InitCodeAllocHint(size_t base, size_t size, int randomPageOffset);
+
+    // Use this function to reset the g_codeAllocHint
+    // after unloading an AppDomain
+    static void ResetCodeAllocHint();
+
+    // Returns TRUE if p is located in near clr.dll that allows us
+    // to use rel32 IP-relative addressing modes.
+    static bool IsPreferredExecutableRange(void* p);
+
+    // Reserve the specified amount of virtual address space for executable mapping.
+    void* Reserve(size_t size);
+
+    // Reserve the specified amount of virtual address space for executable mapping.
+    // The reserved range must be within the loAddress and hiAddress. If it is not
+    // possible to reserve memory in such range, the method returns NULL.
+    void* ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress);
+
+    // Reserve the specified amount of virtual address space for executable mapping
+    // exactly at the given address.
+    void* ReserveAt(void* baseAddressRX, size_t size);
+
+    // Commit the specified range of memory. The memory can be committed as executable (RX)
+    // or non-executable (RW) based on the passed in isExecutable flag. The non-executable
+    // allocations are used to allocate data structures that need to be close to the
+    // executable code due to memory addressing performance related reasons.
+    void* Commit(void* pStart, size_t size, bool isExecutable);
+
+    // Release the executable memory block starting at the passed in address that was allocated
+    // by one of the ReserveXXX methods.
+    void Release(void* pRX);
+
+    // Map the specified block of executable memory as RW
+    void* MapRW(void* pRX, size_t size);
+
+    // Unmap the RW mapping at the specified address
+    void UnmapRW(void* pRW);
+};
+
 // Holder class to map read-execute memory as read-write so that it can be modified without using read-write-execute mapping.
 // At the moment the implementation is dummy, returning the same addresses for both cases and expecting them to be read-write-execute.
 // The class uses the move semantics to ensure proper unmapping in case of re-assigning of the holder value.
@@ -30,13 +215,17 @@ class ExecutableWriterHolder
 
     void Unmap()
     {
+#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE)
         if (m_addressRX != NULL)
         {
-            // TODO: mapping / unmapping for targets using double memory mapping  will be added with the double mapped allocator addition 
-#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE)
             PAL_JitWriteProtect(false);
-#endif
         }
+#else
+        if (m_addressRX != m_addressRW)
+        {
+            ExecutableAllocator::Instance()->UnmapRW((void*)m_addressRW);
+        }
+#endif
     }
 
 public:
@@ -62,9 +251,11 @@ class ExecutableWriterHolder
     ExecutableWriterHolder(T* addressRX, size_t size)
     {
         m_addressRX = addressRX;
+#if defined(HOST_OSX) && defined(HOST_ARM64)
         m_addressRW = addressRX;
-#if defined(HOST_OSX) && defined(HOST_ARM64) && !defined(DACCESS_COMPILE)
         PAL_JitWriteProtect(true);
+#else
+        m_addressRW = (T *)ExecutableAllocator::Instance()->MapRW((void*)addressRX, size);
 #endif
     }
 
@@ -79,3 +270,5 @@ class ExecutableWriterHolder
         return m_addressRW;
     }
 };
+
+#endif // !DACCESS_COMPILE
diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h
index fb65ea9fa613c..3c42f0850850b 100644
--- a/src/coreclr/inc/jithelpers.h
+++ b/src/coreclr/inc/jithelpers.h
@@ -302,12 +302,12 @@
 #endif // !FEATURE_EH_FUNCLETS
 
 #ifdef TARGET_X86
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
-    JITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EAX, JIT_WriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBX, JIT_WriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ECX, JIT_WriteBarrierECX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_ESI, JIT_WriteBarrierESI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EDI, JIT_WriteBarrierEDI, CORINFO_HELP_SIG_NO_ALIGN_STUB)
+    DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_REF_EBP, JIT_WriteBarrierEBP, CORINFO_HELP_SIG_NO_ALIGN_STUB)
 
     JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, JIT_CheckedWriteBarrierEAX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
     JITHELPER(CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, JIT_CheckedWriteBarrierEBX, CORINFO_HELP_SIG_NO_ALIGN_STUB)
diff --git a/src/coreclr/inc/utilcode.h b/src/coreclr/inc/utilcode.h
index a47034ee2e05c..77df9dfa94d2a 100644
--- a/src/coreclr/inc/utilcode.h
+++ b/src/coreclr/inc/utilcode.h
@@ -1014,35 +1014,6 @@ void    SplitPath(__in SString const &path,
 
 #define CLRGetTickCount64() GetTickCount64()
 
-//
-// Use this function to initialize the s_CodeAllocHint
-// during startup. base is runtime .dll base address,
-// size is runtime .dll virtual size.
-//
-void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset);
-
-
-//
-// Use this function to reset the s_CodeAllocHint
-// after unloading an AppDomain
-//
-void ResetCodeAllocHint();
-
-//
-// Returns TRUE if p is located in near clr.dll that allows us
-// to use rel32 IP-relative addressing modes.
-//
-BOOL IsPreferredExecutableRange(void * p);
-
-//
-// Allocate free memory that will be used for executable code
-// Handles the special requirements that we have on 64-bit platforms
-// where we want the executable memory to be located near mscorwks
-//
-BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize,
-                                 DWORD flAllocationType,
-                                 DWORD flProtect);
-
 //
 // Allocate free memory within the range [pMinAddr..pMaxAddr] using
 // ClrVirtualQuery to find free memory and ClrVirtualAlloc to allocate it.
diff --git a/src/coreclr/minipal/CMakeLists.txt b/src/coreclr/minipal/CMakeLists.txt
new file mode 100644
index 0000000000000..3096237d2a2fe
--- /dev/null
+++ b/src/coreclr/minipal/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories(.)
+if (CLR_CMAKE_HOST_UNIX)
+    add_subdirectory(Unix)
+else (CLR_CMAKE_HOST_UNIX)
+    add_subdirectory(Windows)
+endif (CLR_CMAKE_HOST_UNIX)
+
diff --git a/src/coreclr/minipal/Unix/CMakeLists.txt b/src/coreclr/minipal/Unix/CMakeLists.txt
new file mode 100644
index 0000000000000..b56b5017d375f
--- /dev/null
+++ b/src/coreclr/minipal/Unix/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(coreclrminipal
+    STATIC
+    doublemapping.cpp
+)
diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp
new file mode 100644
index 0000000000000..52f3809efb868
--- /dev/null
+++ b/src/coreclr/minipal/Unix/doublemapping.cpp
@@ -0,0 +1,168 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+#ifdef TARGET_LINUX
+#include <linux/memfd.h>
+#include <sys/syscall.h> // __NR_memfd_create
+#endif // TARGET_LINUX
+#include "minipal.h"
+
+#if defined(TARGET_OSX) && defined(TARGET_AMD64)
+#include <mach/mach.h>
+#endif // TARGET_OSX && TARGET_AMD64
+
+#ifndef TARGET_OSX
+
+#ifdef TARGET_64BIT
+static const off_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024;
+#else
+static const off_t MaxDoubleMappedSize = UINT_MAX;
+#endif
+
+#ifdef TARGET_LINUX
+#define memfd_create(...) syscall(__NR_memfd_create, __VA_ARGS__)
+#endif // TARGET_LINUX
+
+#endif // TARGET_OSX
+
+bool VMToOSInterface::CreateDoubleMemoryMapper(void** pHandle, size_t *pMaxExecutableCodeSize)
+{
+#ifndef TARGET_OSX
+
+    int fd = memfd_create("doublemapper", MFD_CLOEXEC);
+
+    if (fd == -1)
+    {
+        return false;
+    }
+
+    if (ftruncate(fd, MaxDoubleMappedSize) == -1)
+    {
+        close(fd);
+        return false;
+    }
+
+    *pMaxExecutableCodeSize = MaxDoubleMappedSize;
+    *pHandle = (void*)(size_t)fd;
+#else // !TARGET_OSX
+    *pMaxExecutableCodeSize = SIZE_MAX;
+    *pHandle = NULL;
+#endif // !TARGET_OSX
+
+    return true;
+}
+
+void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle)
+{
+#ifndef TARGET_OSX
+    close((int)(size_t)mapperHandle);
+#endif
+}
+
+extern "C" void* PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(const void* lpBeginAddress, const void* lpEndAddress, size_t dwSize);
+
+void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd)
+{
+    int fd = (int)(size_t)mapperHandle;
+
+    if (rangeStart != NULL || rangeEnd != NULL)
+    {
+        void* result = PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(rangeStart, rangeEnd, size);
+#ifndef TARGET_OSX
+        if (result != NULL)
+        {
+            // Map the shared memory over the range reserved from the executable memory allocator.
+            result = mmap(result, size, PROT_NONE, MAP_SHARED | MAP_FIXED, fd, offset);
+            if (result == MAP_FAILED)
+            {
+                assert(false);
+                result = NULL;
+            }
+        }
+#endif // TARGET_OSX
+
+        return result;
+    }
+
+#ifndef TARGET_OSX
+    void* result = mmap(NULL, size, PROT_NONE, MAP_SHARED, fd, offset);
+#else
+    void* result = mmap(NULL, size, PROT_NONE, MAP_JIT | MAP_ANON | MAP_PRIVATE, -1, 0);
+#endif    
+    if (result == MAP_FAILED)
+    {
+        assert(false);
+        result = NULL;
+    }
+    return result;
+}
+
+void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable)
+{
+    if (mprotect(pStart, size, isExecutable ? (PROT_READ | PROT_EXEC) : (PROT_READ | PROT_WRITE)) == -1)
+    {
+        return NULL;
+    }
+
+    return pStart;
+}
+
+bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+#ifndef TARGET_OSX
+    int fd = (int)(size_t)mapperHandle;
+    mmap(pStart, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, offset);
+    memset(pStart, 0, size);
+#endif // TARGET_OSX
+    return munmap(pStart, size) != -1;
+}
+
+void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+#ifndef TARGET_OSX
+    int fd = (int)(size_t)mapperHandle;
+    return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
+#else // TARGET_OSX
+#ifdef TARGET_AMD64
+    vm_address_t startRW;
+    vm_prot_t curProtection, maxProtection;
+    kern_return_t kr = vm_remap(mach_task_self(), &startRW, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR,
+                                mach_task_self(), (vm_address_t)pStart, FALSE, &curProtection, &maxProtection, VM_INHERIT_NONE);
+
+    if (kr != KERN_SUCCESS)
+    {
+        return NULL;
+    }
+
+    int st = mprotect((void*)startRW, size, PROT_READ | PROT_WRITE);
+    if (st == -1)
+    {
+        munmap((void*)startRW, size);
+        return NULL;
+    }
+
+    return (void*)startRW;
+#else // TARGET_AMD64
+    // This method should not be called on OSX ARM64
+    assert(false);
+    return NULL;
+#endif // TARGET_AMD64
+#endif // TARGET_OSX
+}
+
+bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size)
+{
+    return munmap(pStart, size) != -1;
+}
diff --git a/src/coreclr/minipal/Windows/CMakeLists.txt b/src/coreclr/minipal/Windows/CMakeLists.txt
new file mode 100644
index 0000000000000..b56b5017d375f
--- /dev/null
+++ b/src/coreclr/minipal/Windows/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_library(coreclrminipal
+    STATIC
+    doublemapping.cpp
+)
diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp
new file mode 100644
index 0000000000000..5edda681f2598
--- /dev/null
+++ b/src/coreclr/minipal/Windows/doublemapping.cpp
@@ -0,0 +1,246 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+
+#include <windows.h>
+#include <inttypes.h>
+#include <assert.h>
+#include "minipal.h"
+
+#define HIDWORD(_qw)    ((ULONG)((_qw) >> 32))
+#define LODWORD(_qw)    ((ULONG)(_qw))
+
+#ifdef TARGET_64BIT
+static const uint64_t MaxDoubleMappedSize = 2048ULL*1024*1024*1024;
+#else
+static const uint64_t MaxDoubleMappedSize = UINT_MAX;
+#endif
+
+#define VIRTUAL_ALLOC_RESERVE_GRANULARITY (64*1024)    // 0x10000  (64 KB)
+inline size_t ALIGN_UP( size_t val, size_t alignment )
+{
+    // alignment must be a power of 2 for this implementation to work (need modulo otherwise)
+    assert( 0 == (alignment & (alignment - 1)) );
+    size_t result = (val + (alignment - 1)) & ~(alignment - 1);
+    assert( result >= val );      // check for overflow
+    return result;
+}
+
+template <typename T> inline T ALIGN_UP(T val, size_t alignment)
+{
+    return (T)ALIGN_UP((size_t)val, alignment);
+}
+
+inline void *GetTopMemoryAddress(void)
+{
+    static void *result; // = NULL;
+    if( NULL == result )
+    {
+        SYSTEM_INFO sysInfo;
+        GetSystemInfo( &sysInfo );
+        result = sysInfo.lpMaximumApplicationAddress;
+    }
+    return result;
+}
+
+inline void *GetBotMemoryAddress(void)
+{
+    static void *result; // = NULL;
+    if( NULL == result )
+    {
+        SYSTEM_INFO sysInfo;
+        GetSystemInfo( &sysInfo );
+        result = sysInfo.lpMinimumApplicationAddress;
+    }
+    return result;
+}
+
+#define TOP_MEMORY (GetTopMemoryAddress())
+#define BOT_MEMORY (GetBotMemoryAddress())
+
+bool VMToOSInterface::CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize)
+{
+    *pMaxExecutableCodeSize = (size_t)MaxDoubleMappedSize;
+    *pHandle = CreateFileMapping(
+                 INVALID_HANDLE_VALUE,    // use paging file
+                 NULL,                    // default security
+                 PAGE_EXECUTE_READWRITE |  SEC_RESERVE,  // read/write/execute access
+                 HIDWORD(MaxDoubleMappedSize),                       // maximum object size (high-order DWORD)
+                 LODWORD(MaxDoubleMappedSize),   // maximum object size (low-order DWORD)
+                 NULL);
+
+    return *pHandle != NULL;
+}
+
+void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle)
+{
+    CloseHandle((HANDLE)mapperHandle);
+}
+
+void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *pMinAddr, const void* pMaxAddr)
+{
+    BYTE *pResult = nullptr;  // our return value;
+
+    if (size == 0)
+    {
+        return nullptr;
+    }
+
+    //
+    // First lets normalize the pMinAddr and pMaxAddr values
+    //
+    // If pMinAddr is NULL then set it to BOT_MEMORY
+    if ((pMinAddr == 0) || (pMinAddr < (BYTE *) BOT_MEMORY))
+    {
+        pMinAddr = (BYTE *) BOT_MEMORY;
+    }
+
+    // If pMaxAddr is NULL then set it to TOP_MEMORY
+    if ((pMaxAddr == 0) || (pMaxAddr > (BYTE *) TOP_MEMORY))
+    {
+        pMaxAddr = (BYTE *) TOP_MEMORY;
+    }
+
+    // If pMaxAddr is not greater than pMinAddr we can not make an allocation
+    if (pMaxAddr <= pMinAddr)
+    {
+        return nullptr;
+    }
+
+    // If pMinAddr is BOT_MEMORY and pMaxAddr is TOP_MEMORY
+    // then we can call ClrVirtualAlloc instead
+    if ((pMinAddr == (BYTE *) BOT_MEMORY) && (pMaxAddr == (BYTE *) TOP_MEMORY))
+    {
+        return (BYTE*)MapViewOfFile((HANDLE)mapperHandle,
+                        FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE,
+                        HIDWORD((int64_t)offset),
+                        LODWORD((int64_t)offset),
+                        size);
+    }
+
+    // We will do one scan from [pMinAddr .. pMaxAddr]
+    // First align the tryAddr up to next 64k base address.
+    // See docs for VirtualAllocEx and lpAddress and 64k alignment for reasons.
+    //
+    BYTE *   tryAddr            = (BYTE *)ALIGN_UP((BYTE *)pMinAddr, VIRTUAL_ALLOC_RESERVE_GRANULARITY);
+    bool     virtualQueryFailed = false;
+    bool     faultInjected      = false;
+    unsigned virtualQueryCount  = 0;
+
+    // Now scan memory and try to find a free block of the size requested.
+    while ((tryAddr + size) <= (BYTE *) pMaxAddr)
+    {
+        MEMORY_BASIC_INFORMATION mbInfo;
+
+        // Use VirtualQuery to find out if this address is MEM_FREE
+        //
+        virtualQueryCount++;
+        if (!VirtualQuery((LPCVOID)tryAddr, &mbInfo, sizeof(mbInfo)))
+        {
+            // Exit and return nullptr if the VirtualQuery call fails.
+            virtualQueryFailed = true;
+            break;
+        }
+
+        // Is there enough memory free from this start location?
+        // Note that for most versions of UNIX the mbInfo.RegionSize returned will always be 0
+        if ((mbInfo.State == MEM_FREE) &&
+            (mbInfo.RegionSize >= (SIZE_T) size || mbInfo.RegionSize == 0))
+        {
+            // Try reserving the memory using VirtualAlloc now
+            pResult = (BYTE*)MapViewOfFileEx((HANDLE)mapperHandle,
+                        FILE_MAP_EXECUTE | FILE_MAP_READ | FILE_MAP_WRITE,
+                        HIDWORD((int64_t)offset),
+                        LODWORD((int64_t)offset),
+                        size,
+                        tryAddr);
+
+            // Normally this will be successful
+            //
+            if (pResult != nullptr)
+            {
+                // return pResult
+                break;
+            }
+
+#ifdef _DEBUG
+            // if (ShouldInjectFaultInRange())
+            // {
+            //     // return nullptr (failure)
+            //     faultInjected = true;
+            //     break;
+            // }
+#endif // _DEBUG
+
+            // On UNIX we can also fail if our request size 'dwSize' is larger than 64K and
+            // and our tryAddr is pointing at a small MEM_FREE region (smaller than 'dwSize')
+            // However we can't distinguish between this and the race case.
+
+            // We might fail in a race.  So just move on to next region and continue trying
+            tryAddr = tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY;
+        }
+        else
+        {
+            // Try another section of memory
+            tryAddr = max(tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY,
+                          (BYTE*) mbInfo.BaseAddress + mbInfo.RegionSize);
+        }
+    }
+
+    // STRESS_LOG7(LF_JIT, LL_INFO100,
+    //             "ClrVirtualAllocWithinRange request #%u for %08x bytes in [ %p .. %p ], query count was %u - returned %s: %p\n",
+    //             countOfCalls, (DWORD)dwSize, pMinAddr, pMaxAddr,
+    //             virtualQueryCount, (pResult != nullptr) ? "success" : "failure", pResult);
+
+    // If we failed this call the process will typically be terminated
+    // so we log any additional reason for failing this call.
+    //
+    if (pResult == nullptr)
+    {
+        // if ((tryAddr + dwSize) > (BYTE *)pMaxAddr)
+        // {
+        //     // Our tryAddr reached pMaxAddr
+        //     STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: Address space exhausted.\n");
+        // }
+
+        // if (virtualQueryFailed)
+        // {
+        //     STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: VirtualQuery operation failed.\n");
+        // }
+
+        // if (faultInjected)
+        // {
+        //     STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: fault injected.\n");
+        // }
+    }
+
+    return pResult;
+
+}
+
+void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable)
+{
+    return VirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READ : PAGE_READWRITE);
+}
+
+bool VMToOSInterface::ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+    // Zero the memory before the unmapping
+    VirtualAlloc(pStart, size, MEM_COMMIT, PAGE_READWRITE);
+    memset(pStart, 0, size);
+    return UnmapViewOfFile(pStart);
+}
+
+void* VMToOSInterface::GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size)
+{
+    return (BYTE*)MapViewOfFile((HANDLE)mapperHandle,
+                    FILE_MAP_READ | FILE_MAP_WRITE,
+                    HIDWORD((int64_t)offset),
+                    LODWORD((int64_t)offset),
+                    size);
+}
+
+bool VMToOSInterface::ReleaseRWMapping(void* pStart, size_t size)
+{
+    return UnmapViewOfFile(pStart);
+}
diff --git a/src/coreclr/minipal/minipal.h b/src/coreclr/minipal/minipal.h
new file mode 100644
index 0000000000000..39098f9bc1295
--- /dev/null
+++ b/src/coreclr/minipal/minipal.h
@@ -0,0 +1,78 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+//
+#include <stddef.h>
+
+// Interface between the runtime and platform specific functionality
+class VMToOSInterface
+{
+private:
+    ~VMToOSInterface() {}
+public:
+    // Create double mapped memory mapper
+    // Parameters:
+    //  pHandle                - receives handle of the double mapped memory mapper
+    //  pMaxExecutableCodeSize - receives the maximum executable memory size it can map
+    // Return:
+    //  true if it succeeded, false if it failed
+    static bool CreateDoubleMemoryMapper(void **pHandle, size_t *pMaxExecutableCodeSize);
+
+    // Destroy the double mapped memory mapper represented by the passed in handle
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to destroy
+    static void DestroyDoubleMemoryMapper(void *mapperHandle);
+
+    // Reserve a block of memory that can be double mapped.
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to use
+    //  offset       - offset in the underlying shared memory
+    //  size         - size of the block to reserve
+    //  rangeStart
+    //  rangeEnd     - Requests reserving virtual memory in the specified range.
+    //                 Setting both rangeStart and rangeEnd to 0 means that the
+    //                 requested range is not limited.
+    //                 When a specific range is requested, it is obligatory.
+    // Return:
+    //  starting virtual address of the reserved memory or NULL if it failed
+    static void* ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd);
+
+    // Commit a block of memory in the range previously reserved by the ReserveDoubleMappedMemory
+    // Parameters:
+    //  pStart       - start address of the virtual address range to commit
+    //  size         - size of the memory block to commit
+    //  isExecutable - true means that the mapping should be RX, false means RW
+    // Return:
+    //  Committed range start
+    static void* CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable);
+
+    // Release a block of virtual memory previously commited by the CommitDoubleMappedMemory
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to use
+    //  pStart       - start address of the virtual address range to release. It must be one
+    //                 that was previously returned by the CommitDoubleMappedMemory
+    //  offset       - offset in the underlying shared memory
+    //  size         - size of the memory block to release
+    // Return:
+    //  true if it succeeded, false if it failed
+    static bool ReleaseDoubleMappedMemory(void *mapperHandle, void* pStart, size_t offset, size_t size);
+
+    // Get a RW mapping for the RX block specified by the arguments
+    // Parameters:
+    //  mapperHandle - handle of the double mapped memory mapper to use
+    //  pStart       - start address of the RX virtual address range.
+    //  offset       - offset in the underlying shared memory
+    //  size         - size of the memory block to map as RW
+    // Return:
+    //  Starting virtual address of the RW mapping.
+    static void* GetRWMapping(void *mapperHandle, void* pStart, size_t offset, size_t size);
+
+    // Release RW mapping of the block specified by the arguments
+    // Parameters:
+    //  pStart       - Start address of the RW virtual address range. It must be an address
+    //                 previously returned by the GetRWMapping.
+    //  size         - Size of the memory block to release. It must be the size previously
+    //                 passed to the GetRWMapping that returned the pStart.
+    // Return:
+    //  true if it succeeded, false if it failed
+    static bool ReleaseRWMapping(void* pStart, size_t size);
+};
diff --git a/src/coreclr/utilcode/CMakeLists.txt b/src/coreclr/utilcode/CMakeLists.txt
index 1ae433adbfd89..8c57742cb6315 100644
--- a/src/coreclr/utilcode/CMakeLists.txt
+++ b/src/coreclr/utilcode/CMakeLists.txt
@@ -69,6 +69,7 @@ endif(CLR_CMAKE_TARGET_WIN32)
 
 set(UTILCODE_SOURCES
   ${UTILCODE_COMMON_SOURCES}
+  executableallocator.cpp
 )
 
 set(UTILCODE_DAC_SOURCES
diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp
new file mode 100644
index 0000000000000..4d461e66e7e51
--- /dev/null
+++ b/src/coreclr/utilcode/executableallocator.cpp
@@ -0,0 +1,755 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "pedecoder.h"
+#include "executableallocator.h"
+
+#if USE_UPPER_ADDRESS
+// Preferred region to allocate the code in.
+BYTE * ExecutableAllocator::g_codeMinAddr;
+BYTE * ExecutableAllocator::g_codeMaxAddr;
+BYTE * ExecutableAllocator::g_codeAllocStart;
+// Next address to try to allocate for code in the preferred region.
+BYTE * ExecutableAllocator::g_codeAllocHint;
+#endif // USE_UPPER_ADDRESS
+
+bool ExecutableAllocator::g_isWXorXEnabled = false;
+
+ExecutableAllocator::FatalErrorHandler ExecutableAllocator::g_fatalErrorHandler = NULL;
+
+ExecutableAllocator* ExecutableAllocator::g_instance = NULL;
+
+bool ExecutableAllocator::IsDoubleMappingEnabled()
+{
+    LIMITED_METHOD_CONTRACT;
+
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+    return false;
+#else
+    return g_isWXorXEnabled;
+#endif
+}
+
+bool ExecutableAllocator::IsWXORXEnabled()
+{
+    LIMITED_METHOD_CONTRACT;
+
+#if defined(HOST_OSX) && defined(HOST_ARM64)
+    return true;
+#else
+    return g_isWXorXEnabled;
+#endif
+}
+
+extern SYSTEM_INFO g_SystemInfo;
+
+size_t ExecutableAllocator::Granularity()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    return g_SystemInfo.dwAllocationGranularity;
+}
+
+// Use this function to initialize the g_codeAllocHint
+// during startup. base is runtime .dll base address,
+// size is runtime .dll virtual size.
+void ExecutableAllocator::InitCodeAllocHint(size_t base, size_t size, int randomPageOffset)
+{
+#if USE_UPPER_ADDRESS
+
+#ifdef _DEBUG
+    // If GetForceRelocs is enabled we don't constrain the pMinAddr
+    if (PEDecoder::GetForceRelocs())
+        return;
+#endif
+
+    //
+    // If we are using the UPPER_ADDRESS space (on Win64)
+    // then for any code heap that doesn't specify an address
+    // range using [pMinAddr..pMaxAddr] we place it in the
+    // upper address space
+    // This enables us to avoid having to use long JumpStubs
+    // to reach the code for our ngen-ed images.
+    // Which are also placed in the UPPER_ADDRESS space.
+    //
+    SIZE_T reach = 0x7FFF0000u;
+
+    // We will choose the preferred code region based on the address of clr.dll. The JIT helpers
+    // in clr.dll are the most heavily called functions.
+    g_codeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0;
+    g_codeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1;
+
+    BYTE * pStart;
+
+    if (g_codeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS &&
+        (BYTE *)CODEHEAP_START_ADDRESS < g_codeMaxAddr)
+    {
+        // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista)
+        // Use the code head start address that does not cause collisions with NGen images.
+        // This logic is coupled with scripts that we use to assign base addresses.
+        pStart = (BYTE *)CODEHEAP_START_ADDRESS;
+    }
+    else
+    if (base > UINT32_MAX)
+    {
+        // clr.dll got address assigned by ASLR?
+        // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned
+        // addresses. Do not start at g_codeMinAddr exactly so that we can also reach common native images
+        // that can be placed at higher addresses than clr.dll.
+        pStart = g_codeMinAddr + (g_codeMaxAddr - g_codeMinAddr) / 8;
+    }
+    else
+    {
+        // clr.dll missed the base address?
+        // Try to occupy the space right after it.
+        pStart = (BYTE *)(base + size);
+    }
+
+    // Randomize the address space
+    pStart += GetOsPageSize() * randomPageOffset;
+
+    g_codeAllocStart = pStart;
+    g_codeAllocHint = pStart;
+#endif
+}
+
+// Use this function to reset the g_codeAllocHint
+// after unloading an AppDomain
+void ExecutableAllocator::ResetCodeAllocHint()
+{
+    LIMITED_METHOD_CONTRACT;
+#if USE_UPPER_ADDRESS
+    g_codeAllocHint = g_codeAllocStart;
+#endif
+}
+
+// Returns TRUE if p is located in near clr.dll that allows us
+// to use rel32 IP-relative addressing modes.
+bool ExecutableAllocator::IsPreferredExecutableRange(void * p)
+{
+    LIMITED_METHOD_CONTRACT;
+#if USE_UPPER_ADDRESS
+    if (g_codeMinAddr <= (BYTE *)p && (BYTE *)p < g_codeMaxAddr)
+        return true;
+#endif
+    return false;
+}
+
+ExecutableAllocator* ExecutableAllocator::Instance()
+{
+    LIMITED_METHOD_CONTRACT;
+    return g_instance;
+}
+
+ExecutableAllocator::~ExecutableAllocator()
+{
+    if (IsDoubleMappingEnabled())
+    {
+        VMToOSInterface::DestroyDoubleMemoryMapper(m_doubleMemoryMapperHandle);
+    }
+}
+
+HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandler)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    g_fatalErrorHandler = fatalErrorHandler;
+    g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWXORX) != 0;
+    g_instance = new (nothrow) ExecutableAllocator();
+    if (g_instance == NULL)
+    {
+        return E_OUTOFMEMORY;
+    }
+
+    if (!g_instance->Initialize())
+    {
+        return E_FAIL;
+    }
+
+    return S_OK;
+}
+
+bool ExecutableAllocator::Initialize()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (IsDoubleMappingEnabled())
+    {
+        if (!VMToOSInterface::CreateDoubleMemoryMapper(&m_doubleMemoryMapperHandle, &m_maxExecutableCodeSize))
+        {
+            return false;
+        }
+
+        m_CriticalSection = ClrCreateCriticalSection(CrstExecutableAllocatorLock,CrstFlags(CRST_UNSAFE_ANYMODE | CRST_DEBUGGER_THREAD));
+    }
+
+    return true;
+}
+
+//#define ENABLE_CACHED_MAPPINGS
+
+void ExecutableAllocator::UpdateCachedMapping(BlockRW* pBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+#ifdef ENABLE_CACHED_MAPPINGS
+    if (m_cachedMapping == NULL)
+    {
+        m_cachedMapping = pBlock;
+        pBlock->refCount++;
+    }
+    else if (m_cachedMapping != pBlock)
+    {
+        void* unmapAddress = NULL;
+        size_t unmapSize;
+
+        if (!RemoveRWBlock(m_cachedMapping->baseRW, &unmapAddress, &unmapSize))
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found"));
+        }
+        if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize))
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed"));
+        }
+        m_cachedMapping = pBlock;
+        pBlock->refCount++;
+    }
+#endif // ENABLE_CACHED_MAPPINGS    
+}
+
+void* ExecutableAllocator::FindRWBlock(void* baseRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next)
+    {
+        if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size))
+        {
+            pBlock->refCount++;
+            UpdateCachedMapping(pBlock);
+
+            return (BYTE*)pBlock->baseRW + ((size_t)baseRX - (size_t)pBlock->baseRX);
+        }
+    }
+
+    return NULL;
+}
+
+bool ExecutableAllocator::AddRWBlock(void* baseRW, void* baseRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    for (BlockRW* pBlock = m_pFirstBlockRW; pBlock != NULL; pBlock = pBlock->next)
+    {
+        if (pBlock->baseRX <= baseRX && ((size_t)baseRX + size) <= ((size_t)pBlock->baseRX + pBlock->size))
+        {
+            break;
+        }
+    }
+
+    // The new "nothrow" below failure is handled as fail fast since it is not recoverable
+    PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure);
+
+    BlockRW* pBlockRW = new (nothrow) BlockRW();
+    if (pBlockRW == NULL)
+    {
+        g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block metadata cannot be allocated"));
+        return false;
+    }
+
+    pBlockRW->baseRW = baseRW;
+    pBlockRW->baseRX = baseRX;
+    pBlockRW->size = size;
+    pBlockRW->next = m_pFirstBlockRW;
+    pBlockRW->refCount = 1;
+    m_pFirstBlockRW = pBlockRW;
+
+    UpdateCachedMapping(pBlockRW);
+
+    return true;
+}
+
+bool ExecutableAllocator::RemoveRWBlock(void* pRW, void** pUnmapAddress, size_t* pUnmapSize)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    BlockRW* pPrevBlockRW = NULL;
+    for (BlockRW* pBlockRW = m_pFirstBlockRW; pBlockRW != NULL; pBlockRW = pBlockRW->next)
+    {
+        if (pBlockRW->baseRW <= pRW && (size_t)pRW < ((size_t)pBlockRW->baseRW + pBlockRW->size))
+        {
+            // found
+            pBlockRW->refCount--;
+            if (pBlockRW->refCount != 0)
+            {
+                *pUnmapAddress = NULL;
+                return true;
+            }
+
+            if (pPrevBlockRW == NULL)
+            {
+                m_pFirstBlockRW = pBlockRW->next;
+            }
+            else
+            {
+                pPrevBlockRW->next = pBlockRW->next;
+            }
+
+            *pUnmapAddress = pBlockRW->baseRW;
+            *pUnmapSize = pBlockRW->size;
+
+            delete pBlockRW;
+            return true;
+        }
+
+        pPrevBlockRW = pBlockRW;
+    }
+
+    return false;
+}
+
+bool ExecutableAllocator::AllocateOffset(size_t* pOffset, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    size_t offset = m_freeOffset;
+    size_t newFreeOffset = offset + size;
+
+    if (newFreeOffset > m_maxExecutableCodeSize)
+    {
+        return false;
+    }
+
+    m_freeOffset = newFreeOffset;
+
+    *pOffset = offset;
+
+    return true;
+}
+
+void ExecutableAllocator::AddRXBlock(BlockRX* pBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    pBlock->next = m_pFirstBlockRX;
+    m_pFirstBlockRX = pBlock;
+}
+
+void* ExecutableAllocator::Commit(void* pStart, size_t size, bool isExecutable)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (IsDoubleMappingEnabled())
+    {
+        return VMToOSInterface::CommitDoubleMappedMemory(pStart, size, isExecutable);
+    }
+    else
+    {
+        return ClrVirtualAlloc(pStart, size, MEM_COMMIT, isExecutable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
+    }
+}
+
+void ExecutableAllocator::Release(void* pRX)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (IsDoubleMappingEnabled())
+    {
+        CRITSEC_Holder csh(m_CriticalSection);
+
+        // Locate the RX block corresponding to the pRX and remove it from the linked list
+        BlockRX* pBlock;
+        BlockRX* pPrevBlock = NULL;
+
+        for (pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next)
+        {
+            if (pRX == pBlock->baseRX)
+            {
+                if (pPrevBlock == NULL)
+                {
+                    m_pFirstBlockRX = pBlock->next;
+                }
+                else
+                {
+                    pPrevBlock->next = pBlock->next;
+                }
+
+                break;
+            }
+            pPrevBlock = pBlock;
+        }
+
+        if (pBlock != NULL)
+        {
+            VMToOSInterface::ReleaseDoubleMappedMemory(m_doubleMemoryMapperHandle, pRX, pBlock->offset, pBlock->size);
+            // Put the released block into the free block list
+            pBlock->baseRX = NULL;
+            pBlock->next = m_pFirstFreeBlockRX;
+            m_pFirstFreeBlockRX = pBlock;
+        }
+        else
+        {
+            // The block was not found, which should never happen.
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to release was not found"));
+        }
+    }
+    else
+    {
+        ClrVirtualFree(pRX, 0, MEM_RELEASE);
+    }
+}
+
+// Find a free block with the closest size >= the requested size.
+// Returns NULL if no such block exists.
+ExecutableAllocator::BlockRX* ExecutableAllocator::FindBestFreeBlock(size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    BlockRX* pPrevBlock = NULL;
+    BlockRX* pPrevBestBlock = NULL;
+    BlockRX* pBestBlock = NULL;
+    BlockRX* pBlock = m_pFirstFreeBlockRX;
+
+    while (pBlock != NULL)
+    {
+        if (pBlock->size >= size)
+        {
+            if (pBestBlock != NULL)
+            {
+                if (pBlock->size < pBestBlock->size)
+                {
+                    pPrevBestBlock = pPrevBlock;
+                    pBestBlock = pBlock;
+                }
+            }
+            else
+            {
+                pPrevBestBlock = pPrevBlock;
+                pBestBlock = pBlock;
+            }
+        }
+        pPrevBlock = pBlock;
+        pBlock = pBlock->next;
+    }
+
+    if (pBestBlock != NULL)
+    {
+        if (pPrevBestBlock != NULL)
+        {
+            pPrevBestBlock->next = pBestBlock->next;
+        }
+        else
+        {
+            m_pFirstFreeBlockRX = pBestBlock->next;
+        }
+
+        pBestBlock->next = NULL;
+    }
+
+    return pBestBlock;
+}
+
+// Allocate a new block of executable memory and the related descriptor structure.
+// First try to get it from the free blocks and if there is no suitable free block,
+// allocate a new one.
+ExecutableAllocator::BlockRX* ExecutableAllocator::AllocateBlock(size_t size, bool* pIsFreeBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    size_t offset;
+    BlockRX* block = FindBestFreeBlock(size);
+    *pIsFreeBlock = (block != NULL);
+
+    if (block == NULL)
+    {
+        if (!AllocateOffset(&offset, size))
+        {
+            return NULL;
+        }
+
+        block = new (nothrow) BlockRX();
+        if (block == NULL)
+        {
+            return NULL;
+        }
+
+        block->offset = offset;
+        block->size = size;
+    }
+
+    return block;
+}
+
+// Backout a previously allocated block. The block is added to the free blocks list and
+// reused for later allocation requests.
+void ExecutableAllocator::BackoutBlock(BlockRX* pBlock, bool isFreeBlock)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (!isFreeBlock)
+    {
+        m_freeOffset -= pBlock->size;
+        delete pBlock;
+    }
+    else
+    {
+        pBlock->next = m_pFirstFreeBlockRX;
+        m_pFirstFreeBlockRX = pBlock;
+    }
+}
+
+// Reserve executable memory within the specified virtual address space range. If it is not possible to
+// reserve memory in that range, the method returns NULL and nothing is allocated.
+void* ExecutableAllocator::ReserveWithinRange(size_t size, const void* loAddress, const void* hiAddress)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE((size & (Granularity() - 1)) == 0);
+    if (IsDoubleMappingEnabled())
+    {
+        CRITSEC_Holder csh(m_CriticalSection);
+
+        bool isFreeBlock;
+        BlockRX* block = AllocateBlock(size, &isFreeBlock);
+        if (block == NULL)
+        {
+            return NULL;
+        }
+
+        void *result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, loAddress, hiAddress);
+
+        if (result != NULL)
+        {
+            block->baseRX = result;
+            AddRXBlock(block);
+        }
+        else 
+        {
+            BackoutBlock(block, isFreeBlock);
+        }
+
+        return result;
+    }
+    else
+    {
+        DWORD allocationType = MEM_RESERVE;
+#ifdef HOST_UNIX
+        // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory.
+        // This will allow us to place JIT'ed code close to the coreclr library
+        // and thus improve performance by avoiding jump stubs in managed code.
+        allocationType |= MEM_RESERVE_EXECUTABLE;
+#endif
+        return ClrVirtualAllocWithinRange((const BYTE*)loAddress, (const BYTE*)hiAddress, size, allocationType, PAGE_NOACCESS);
+    }
+}
+
+// Reserve executable memory. On Windows it tries to use the allocation hints to
+// allocate memory close to the previously allocated executable memory and loaded
+// executable files.
+void* ExecutableAllocator::Reserve(size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE((size & (Granularity() - 1)) == 0);
+
+    BYTE *result = NULL;
+
+#if USE_UPPER_ADDRESS
+    //
+    // If we are using the UPPER_ADDRESS space (on Win64)
+    // then for any heap that will contain executable code
+    // we will place it in the upper address space
+    //
+    // This enables us to avoid having to use JumpStubs
+    // to reach the code for our ngen-ed images on x64,
+    // since they are also placed in the UPPER_ADDRESS space.
+    //
+    BYTE * pHint = g_codeAllocHint;
+
+    if (size <= (SIZE_T)(g_codeMaxAddr - g_codeMinAddr) && pHint != NULL)
+    {
+        // Try to allocate in the preferred region after the hint
+        result = (BYTE*)ReserveWithinRange(size, pHint, g_codeMaxAddr);
+        if (result != NULL)
+        {
+            g_codeAllocHint = result + size;
+        }
+        else
+        {
+            // Try to allocate in the preferred region before the hint
+            result = (BYTE*)ReserveWithinRange(size, g_codeMinAddr, pHint + size);
+
+            if (result != NULL)
+            {
+                g_codeAllocHint = result + size;
+            }
+
+            g_codeAllocHint = NULL;
+        }
+    }
+
+    // Fall through to
+#endif // USE_UPPER_ADDRESS
+
+    if (result == NULL)
+    {
+        if (IsDoubleMappingEnabled())
+        {
+            CRITSEC_Holder csh(m_CriticalSection);
+
+            bool isFreeBlock;
+            BlockRX* block = AllocateBlock(size, &isFreeBlock);
+            if (block == NULL)
+            {
+                return NULL;
+            }
+
+            result = (BYTE*)VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, 0, 0);
+
+            if (result != NULL)
+            {
+                block->baseRX = result;
+                AddRXBlock(block);
+            }
+            else 
+            {
+                BackoutBlock(block, isFreeBlock);
+            }
+        }
+        else
+        {
+            DWORD allocationType = MEM_RESERVE;
+#ifdef HOST_UNIX
+            // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory.
+            // This will allow us to place JIT'ed code close to the coreclr library
+            // and thus improve performance by avoiding jump stubs in managed code.
+            allocationType |= MEM_RESERVE_EXECUTABLE;
+#endif
+            result = (BYTE*)ClrVirtualAlloc(NULL, size, allocationType, PAGE_NOACCESS);
+        }
+    }
+
+    return result;
+}
+
+// Reserve a block of executable memory at the specified virtual address. If it is not
+// possible, the method returns NULL.
+void* ExecutableAllocator::ReserveAt(void* baseAddressRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    _ASSERTE((size & (Granularity() - 1)) == 0);
+
+    if (IsDoubleMappingEnabled())
+    {
+        CRITSEC_Holder csh(m_CriticalSection);
+
+        bool isFreeBlock;
+        BlockRX* block = AllocateBlock(size, &isFreeBlock);
+        if (block == NULL)
+        {
+            return NULL;
+        }
+
+        void* result = VMToOSInterface::ReserveDoubleMappedMemory(m_doubleMemoryMapperHandle, block->offset, size, baseAddressRX, baseAddressRX);
+
+        if (result != NULL)
+        {
+            block->baseRX = result;
+            AddRXBlock(block);
+        }
+        else 
+        {
+            BackoutBlock(block, isFreeBlock);
+        }
+
+        return result;
+    }
+    else
+    {
+        return VirtualAlloc(baseAddressRX, size, MEM_RESERVE, PAGE_NOACCESS);
+    }
+}
+
+// Map an executable memory block as writeable. If there is already a mapping
+// covering the specified block, return that mapping instead of creating a new one.
+// Return starting address of the writeable mapping.
+void* ExecutableAllocator::MapRW(void* pRX, size_t size)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (!IsDoubleMappingEnabled())
+    {
+        return pRX;
+    }
+
+    CRITSEC_Holder csh(m_CriticalSection);
+
+    void* result = FindRWBlock(pRX, size);
+    if (result != NULL)
+    {
+        return result;
+    }
+
+    for (BlockRX* pBlock = m_pFirstBlockRX; pBlock != NULL; pBlock = pBlock->next)
+    {
+        if (pRX >= pBlock->baseRX && ((size_t)pRX + size) <= ((size_t)pBlock->baseRX + pBlock->size))
+        {
+            // Offset of the RX address in the originally allocated block
+            size_t offset = (size_t)pRX - (size_t)pBlock->baseRX;
+            // Offset of the RX address that will start the newly mapped block
+            size_t mapOffset = ALIGN_DOWN(offset, Granularity());
+            // Size of the block we will map
+            size_t mapSize = ALIGN_UP(offset - mapOffset + size, Granularity());
+            void* pRW = VMToOSInterface::GetRWMapping(m_doubleMemoryMapperHandle, (BYTE*)pBlock->baseRX + mapOffset, pBlock->offset + mapOffset, mapSize);
+
+            if (pRW == NULL)
+            {
+                g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Failed to create RW mapping for RX memory"));
+            }
+
+            AddRWBlock(pRW, (BYTE*)pBlock->baseRX + mapOffset, mapSize);
+
+            return (void*)((size_t)pRW + (offset - mapOffset));
+        }
+        else if (pRX >= pBlock->baseRX && pRX < (void*)((size_t)pBlock->baseRX + pBlock->size))
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to RW map a block that crosses the end of the allocated RX range"));
+        }
+        else if (pRX < pBlock->baseRX && (void*)((size_t)pRX + size) > pBlock->baseRX)
+        {
+            g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Attempting to map a block that crosses the beginning of the allocated range"));
+        }
+    }
+
+    // The executable memory block was not found, so we cannot provide the writeable mapping.
+    g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RX block to map as RW was not found"));
+    return NULL;
+}
+
+// Unmap writeable mapping at the specified address. The address must be an address
+// returned by the MapRW method.
+void ExecutableAllocator::UnmapRW(void* pRW)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (!IsDoubleMappingEnabled())
+    {
+        return;
+    }
+
+    CRITSEC_Holder csh(m_CriticalSection);
+    _ASSERTE(pRW != NULL);
+
+    void* unmapAddress = NULL;
+    size_t unmapSize;
+
+    if (!RemoveRWBlock(pRW, &unmapAddress, &unmapSize))
+    {
+        g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("The RW block to unmap was not found"));
+    }
+
+    if (unmapAddress && !VMToOSInterface::ReleaseRWMapping(unmapAddress, unmapSize))
+    {
+        g_fatalErrorHandler(COR_E_EXECUTIONENGINE, W("Releasing the RW mapping failed"));
+    }
+}
diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp
index adaf07d8f5825..5828763f512f2 100644
--- a/src/coreclr/utilcode/loaderheap.cpp
+++ b/src/coreclr/utilcode/loaderheap.cpp
@@ -977,9 +977,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap()
 
         if (fReleaseMemory)
         {
-            BOOL fSuccess;
-            fSuccess = ClrVirtualFree(pVirtualAddress, 0, MEM_RELEASE);
-            _ASSERTE(fSuccess);
+            ExecutableAllocator::Instance()->Release(pVirtualAddress);
         }
 
         delete pSearch;
@@ -987,9 +985,7 @@ UnlockedLoaderHeap::~UnlockedLoaderHeap()
 
     if (m_reservedBlock.m_fReleaseMemory)
     {
-        BOOL fSuccess;
-        fSuccess = ClrVirtualFree(m_reservedBlock.pVirtualAddress, 0, MEM_RELEASE);
-        _ASSERTE(fSuccess);
+        ExecutableAllocator::Instance()->Release(m_reservedBlock.pVirtualAddress);
     }
 
     INDEBUG(s_dwNumInstancesOfLoaderHeaps --;)
@@ -1058,7 +1054,7 @@ void ReleaseReservedMemory(BYTE* value)
 {
     if (value)
     {
-        ClrVirtualFree(value, 0, MEM_RELEASE);
+        ExecutableAllocator::Instance()->Release(value);
     }
 }
 
@@ -1114,7 +1110,9 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit)
         // Reserve pages
         //
 
-        pData = ClrVirtualAllocExecutable(dwSizeToReserve, MEM_RESERVE, PAGE_NOACCESS);
+        // Reserve the memory for even non-executable stuff close to the executable code, as it has profound effect
+        // on e.g. a static variable access performance.
+        pData = (BYTE *)ExecutableAllocator::Instance()->Reserve(dwSizeToReserve);
         if (pData == NULL)
         {
             return FALSE;
@@ -1140,7 +1138,7 @@ BOOL UnlockedLoaderHeap::UnlockedReservePages(size_t dwSizeToCommit)
     }
 
     // Commit first set of pages, since it will contain the LoaderHeapBlock
-    void *pTemp = ClrVirtualAlloc(pData, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
+    void *pTemp = ExecutableAllocator::Instance()->Commit(pData, dwSizeToCommit, (m_Options & LHF_EXECUTABLE));
     if (pTemp == NULL)
     {
         //_ASSERTE(!"Unable to ClrVirtualAlloc commit in a loaderheap");
@@ -1213,7 +1211,7 @@ BOOL UnlockedLoaderHeap::GetMoreCommittedPages(size_t dwMinSize)
         dwSizeToCommit = ALIGN_UP(dwSizeToCommit, GetOsPageSize());
 
         // Yes, so commit the desired number of reserved pages
-        void *pData = ClrVirtualAlloc(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, MEM_COMMIT, (m_Options & LHF_EXECUTABLE) ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
+        void *pData = ExecutableAllocator::Instance()->Commit(m_pPtrToEndOfCommittedRegion, dwSizeToCommit, (m_Options & LHF_EXECUTABLE));
         if (pData == NULL)
             return FALSE;
 
diff --git a/src/coreclr/utilcode/util.cpp b/src/coreclr/utilcode/util.cpp
index 0026d1f619f14..e7b1755b2b1c4 100644
--- a/src/coreclr/utilcode/util.cpp
+++ b/src/coreclr/utilcode/util.cpp
@@ -352,168 +352,6 @@ HRESULT FakeCoCreateInstanceEx(REFCLSID       rclsid,
     return hr;
 }
 
-#if USE_UPPER_ADDRESS
-static BYTE * s_CodeMinAddr;        // Preferred region to allocate the code in.
-static BYTE * s_CodeMaxAddr;
-static BYTE * s_CodeAllocStart;
-static BYTE * s_CodeAllocHint;      // Next address to try to allocate for code in the preferred region.
-#endif
-
-//
-// Use this function to initialize the s_CodeAllocHint
-// during startup. base is runtime .dll base address,
-// size is runtime .dll virtual size.
-//
-void InitCodeAllocHint(SIZE_T base, SIZE_T size, int randomPageOffset)
-{
-#if USE_UPPER_ADDRESS
-
-#ifdef _DEBUG
-    // If GetForceRelocs is enabled we don't constrain the pMinAddr
-    if (PEDecoder::GetForceRelocs())
-        return;
-#endif
-
-//
-    // If we are using the UPPER_ADDRESS space (on Win64)
-    // then for any code heap that doesn't specify an address
-    // range using [pMinAddr..pMaxAddr] we place it in the
-    // upper address space
-    // This enables us to avoid having to use long JumpStubs
-    // to reach the code for our ngen-ed images.
-    // Which are also placed in the UPPER_ADDRESS space.
-    //
-    SIZE_T reach = 0x7FFF0000u;
-
-    // We will choose the preferred code region based on the address of clr.dll. The JIT helpers
-    // in clr.dll are the most heavily called functions.
-    s_CodeMinAddr = (base + size > reach) ? (BYTE *)(base + size - reach) : (BYTE *)0;
-    s_CodeMaxAddr = (base + reach > base) ? (BYTE *)(base + reach) : (BYTE *)-1;
-
-    BYTE * pStart;
-
-    if (s_CodeMinAddr <= (BYTE *)CODEHEAP_START_ADDRESS &&
-        (BYTE *)CODEHEAP_START_ADDRESS < s_CodeMaxAddr)
-    {
-        // clr.dll got loaded at its preferred base address? (OS without ASLR - pre-Vista)
-        // Use the code head start address that does not cause collisions with NGen images.
-        // This logic is coupled with scripts that we use to assign base addresses.
-        pStart = (BYTE *)CODEHEAP_START_ADDRESS;
-    }
-    else
-    if (base > UINT32_MAX)
-    {
-        // clr.dll got address assigned by ASLR?
-        // Try to occupy the space as far as possible to minimize collisions with other ASLR assigned
-        // addresses. Do not start at s_CodeMinAddr exactly so that we can also reach common native images
-        // that can be placed at higher addresses than clr.dll.
-        pStart = s_CodeMinAddr + (s_CodeMaxAddr - s_CodeMinAddr) / 8;
-    }
-    else
-    {
-        // clr.dll missed the base address?
-        // Try to occupy the space right after it.
-        pStart = (BYTE *)(base + size);
-    }
-
-    // Randomize the address space
-    pStart += GetOsPageSize() * randomPageOffset;
-
-    s_CodeAllocStart = pStart;
-    s_CodeAllocHint = pStart;
-#endif
-}
-
-//
-// Use this function to reset the s_CodeAllocHint
-// after unloading an AppDomain
-//
-void ResetCodeAllocHint()
-{
-    LIMITED_METHOD_CONTRACT;
-#if USE_UPPER_ADDRESS
-    s_CodeAllocHint = s_CodeAllocStart;
-#endif
-}
-
-//
-// Returns TRUE if p is located in near clr.dll that allows us
-// to use rel32 IP-relative addressing modes.
-//
-BOOL IsPreferredExecutableRange(void * p)
-{
-    LIMITED_METHOD_CONTRACT;
-#if USE_UPPER_ADDRESS
-    if (s_CodeMinAddr <= (BYTE *)p && (BYTE *)p < s_CodeMaxAddr)
-        return TRUE;
-#endif
-    return FALSE;
-}
-
-//
-// Allocate free memory that will be used for executable code
-// Handles the special requirements that we have on 64-bit platforms
-// where we want the executable memory to be located near clr.dll
-//
-BYTE * ClrVirtualAllocExecutable(SIZE_T dwSize,
-                                 DWORD flAllocationType,
-                                 DWORD flProtect)
-{
-    CONTRACTL
-    {
-        NOTHROW;
-    }
-    CONTRACTL_END;
-
-#if USE_UPPER_ADDRESS
-    //
-    // If we are using the UPPER_ADDRESS space (on Win64)
-    // then for any heap that will contain executable code
-    // we will place it in the upper address space
-    //
-    // This enables us to avoid having to use JumpStubs
-    // to reach the code for our ngen-ed images on x64,
-    // since they are also placed in the UPPER_ADDRESS space.
-    //
-    BYTE * pHint = s_CodeAllocHint;
-
-    if (dwSize <= (SIZE_T)(s_CodeMaxAddr - s_CodeMinAddr) && pHint != NULL)
-    {
-        // Try to allocate in the preferred region after the hint
-        BYTE * pResult = ClrVirtualAllocWithinRange(pHint, s_CodeMaxAddr, dwSize, flAllocationType, flProtect);
-
-        if (pResult != NULL)
-        {
-            s_CodeAllocHint = pResult + dwSize;
-            return pResult;
-        }
-
-        // Try to allocate in the preferred region before the hint
-        pResult = ClrVirtualAllocWithinRange(s_CodeMinAddr, pHint + dwSize, dwSize, flAllocationType, flProtect);
-
-        if (pResult != NULL)
-        {
-            s_CodeAllocHint = pResult + dwSize;
-            return pResult;
-        }
-
-        s_CodeAllocHint = NULL;
-    }
-
-    // Fall through to
-#endif // USE_UPPER_ADDRESS
-
-#ifdef HOST_UNIX
-    // Tell PAL to use the executable memory allocator to satisfy this request for virtual memory.
-    // This will allow us to place JIT'ed code close to the coreclr library
-    // and thus improve performance by avoiding jump stubs in managed code.
-    flAllocationType |= MEM_RESERVE_EXECUTABLE;
-#endif // HOST_UNIX
-
-    return (BYTE *) ClrVirtualAlloc (NULL, dwSize, flAllocationType, flProtect);
-
-}
-
 //
 // Allocate free memory with specific alignment.
 //
diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt
index 1d682d2a428bb..9c2cb3df0b7e9 100644
--- a/src/coreclr/vm/CMakeLists.txt
+++ b/src/coreclr/vm/CMakeLists.txt
@@ -833,7 +833,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
     set(VM_SOURCES_DAC_AND_WKS_ARCH
         ${ARCH_SOURCES_DIR}/exceparm.cpp
         ${ARCH_SOURCES_DIR}/stubs.cpp
-        ${ARCH_SOURCES_DIR}/armsinglestepper.cpp
     )
 
     set(VM_HEADERS_DAC_AND_WKS_ARCH
@@ -844,6 +843,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
 
     set(VM_SOURCES_WKS_ARCH
         ${ARCH_SOURCES_DIR}/profiler.cpp
+        ${ARCH_SOURCES_DIR}/armsinglestepper.cpp
         exceptionhandling.cpp
         gcinfodecoder.cpp
     )
@@ -868,7 +868,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
     )
 
     if(CLR_CMAKE_HOST_UNIX)
-        list(APPEND VM_SOURCES_DAC_AND_WKS_ARCH
+        list(APPEND VM_SOURCES_WKS_ARCH
             ${ARCH_SOURCES_DIR}/arm64singlestepper.cpp
         )
     endif(CLR_CMAKE_HOST_UNIX)
diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm
index 82a301bb0cbd1..219597eb350c2 100644
--- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm
+++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm
@@ -51,37 +51,6 @@ endif
 
 extern JIT_InternalThrow:proc
 
-; There is an even more optimized version of these helpers possible which takes
-; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
-; that check (this is more significant in the JIT_WriteBarrier case).
-;
-; Additionally we can look into providing helpers which will take the src/dest from
-; specific registers (like x86) which _could_ (??) make for easier register allocation
-; for the JIT64, however it might lead to having to have some nasty code that treats
-; these guys really special like... :(.
-;
-; Version that does the move, checks whether or not it's in the GC and whether or not
-; it needs to have it's card updated
-;
-; void JIT_CheckedWriteBarrier(Object** dst, Object* src)
-LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
-
-        ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
-        ; but if it isn't then it will just return.
-        ;
-        ; See if this is in GCHeap
-        cmp     rcx, [g_lowest_address]
-        jb      NotInHeap
-        cmp     rcx, [g_highest_address]
-        jnb     NotInHeap
-
-        jmp     JIT_WriteBarrier
-
-    NotInHeap:
-        ; See comment above about possible AV
-        mov     [rcx], rdx
-        ret
-LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
 
 ; Mark start of the code region that we patch at runtime
 LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
@@ -99,7 +68,8 @@ LEAF_ENTRY JIT_WriteBarrier, _TEXT
 
 ifdef _DEBUG
         ; In debug builds, this just contains jump to the debug version of the write barrier by default
-        jmp     JIT_WriteBarrier_Debug
+        mov     rax, JIT_WriteBarrier_Debug
+        jmp     rax
 endif
 
 ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
@@ -388,6 +358,51 @@ endif
         ret
 LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT
 
+Section segment para 'DATA'
+
+        align   16
+
+        public  JIT_WriteBarrier_Loc
+JIT_WriteBarrier_Loc:
+        dq 0
+
+LEAF_ENTRY  JIT_WriteBarrier_Callable, _TEXT
+        ; JIT_WriteBarrier(Object** dst, Object* src)
+        jmp     QWORD PTR [JIT_WriteBarrier_Loc]
+LEAF_END JIT_WriteBarrier_Callable, _TEXT
+
+; There is an even more optimized version of these helpers possible which takes
+; advantage of knowledge of which way the ephemeral heap is growing to only do 1/2
+; that check (this is more significant in the JIT_WriteBarrier case).
+;
+; Additionally we can look into providing helpers which will take the src/dest from
+; specific registers (like x86) which _could_ (??) make for easier register allocation
+; for the JIT64, however it might lead to having to have some nasty code that treats
+; these guys really special like... :(.
+;
+; Version that does the move, checks whether or not it's in the GC and whether or not
+; it needs to have it's card updated
+;
+; void JIT_CheckedWriteBarrier(Object** dst, Object* src)
+LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
+
+        ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference
+        ; but if it isn't then it will just return.
+        ;
+        ; See if this is in GCHeap
+        cmp     rcx, [g_lowest_address]
+        jb      NotInHeap
+        cmp     rcx, [g_highest_address]
+        jnb     NotInHeap
+
+        jmp     QWORD PTR [JIT_WriteBarrier_Loc]
+
+    NotInHeap:
+        ; See comment above about possible AV
+        mov     [rcx], rdx
+        ret
+LEAF_END_MARKED JIT_CheckedWriteBarrier, _TEXT
+
 ; The following helper will access ("probe") a word on each page of the stack
 ; starting with the page right beneath rsp down to the one pointed to by r11.
 ; The procedure is needed to make sure that the "guard" page is pushed down below the allocated stack frame.
diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S
index a13afb4878511..8109886d0c969 100644
--- a/src/coreclr/vm/amd64/jithelpers_fast.S
+++ b/src/coreclr/vm/amd64/jithelpers_fast.S
@@ -32,26 +32,14 @@ LEAF_ENTRY JIT_CheckedWriteBarrier, _TEXT
         // See if this is in GCHeap
         PREPARE_EXTERNAL_VAR g_lowest_address, rax
         cmp     rdi, [rax]
-#ifdef FEATURE_WRITEBARRIER_COPY
         // jb      NotInHeap
         .byte 0x72, 0x12
-#else
-        // jb      NotInHeap
-        .byte 0x72, 0x0e
-#endif
         PREPARE_EXTERNAL_VAR g_highest_address, rax
         cmp     rdi, [rax]
 
-#ifdef FEATURE_WRITEBARRIER_COPY
         // jnb     NotInHeap
         .byte 0x73, 0x06
         jmp     [rip + C_FUNC(JIT_WriteBarrier_Loc)]
-#else
-        // jnb     NotInHeap
-        .byte 0x73, 0x02
-        // jmp C_FUNC(JIT_WriteBarrier)
-        .byte 0xeb, 0x05
-#endif
 
     NotInHeap:
         // See comment above about possible AV
@@ -398,11 +386,17 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT
         ret
 LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT
 
-#ifdef FEATURE_WRITEBARRIER_COPY
         // When JIT_WriteBarrier is copied into an allocated page,
         // helpers use this global variable to jump to it. This variable is set in InitThreadManager.
-        .global _JIT_WriteBarrier_Loc
-        .zerofill __DATA,__common,_JIT_WriteBarrier_Loc,8,3
+        .global C_FUNC(JIT_WriteBarrier_Loc)
+#ifdef TARGET_OSX
+        .zerofill __DATA,__common,C_FUNC(JIT_WriteBarrier_Loc),8,3
+#else
+        .data
+    C_FUNC(JIT_WriteBarrier_Loc):
+        .quad 0
+        .text
+#endif
 
 // ------------------------------------------------------------------
 // __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val)
@@ -412,8 +406,6 @@ LEAF_ENTRY  JIT_WriteBarrier_Callable, _TEXT
     jmp     [rip + C_FUNC(JIT_WriteBarrier_Loc)]
 LEAF_END JIT_WriteBarrier_Callable, _TEXT
 
-#endif // FEATURE_WRITEBARRIER_COPY
-
 
 // The following helper will access ("probe") a word on each page of the stack
 // starting with the page right beneath rsp down to the one pointed to by r11.
diff --git a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp
index 38bff78a54cb0..02b023777b8a9 100644
--- a/src/coreclr/vm/amd64/jitinterfaceamd64.cpp
+++ b/src/coreclr/vm/amd64/jitinterfaceamd64.cpp
@@ -293,7 +293,10 @@ int WriteBarrierManager::ChangeWriteBarrierTo(WriteBarrierType newWriteBarrier,
 
     // the memcpy must come before the switch statment because the asserts inside the switch
     // are actually looking into the JIT_WriteBarrier buffer
-    memcpy(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize());
+    {
+        ExecutableWriterHolder<void> writeBarrierWriterHolder(GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), GetCurrentWriteBarrierSize());
+        memcpy(writeBarrierWriterHolder.GetRW(), (LPVOID)GetCurrentWriteBarrierCode(), GetCurrentWriteBarrierSize());
+    }
 
     switch (newWriteBarrier)
     {
@@ -544,7 +547,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended)
             // Change immediate if different from new g_ephermeral_high.
             if (*(UINT64*)m_pUpperBoundImmediate != (size_t)g_ephemeral_high)
             {
-                *(UINT64*)m_pUpperBoundImmediate = (size_t)g_ephemeral_high;
+                ExecutableWriterHolder<UINT64> upperBoundWriterHolder((UINT64*)m_pUpperBoundImmediate, sizeof(UINT64));
+                *upperBoundWriterHolder.GetRW() = (size_t)g_ephemeral_high;
                 stompWBCompleteActions |= SWB_ICACHE_FLUSH;
             }
         }
@@ -557,7 +561,8 @@ int WriteBarrierManager::UpdateEphemeralBounds(bool isRuntimeSuspended)
             // Change immediate if different from new g_ephermeral_low.
             if (*(UINT64*)m_pLowerBoundImmediate != (size_t)g_ephemeral_low)
             {
-                *(UINT64*)m_pLowerBoundImmediate = (size_t)g_ephemeral_low;
+                ExecutableWriterHolder<UINT64> lowerBoundImmediateWriterHolder((UINT64*)m_pLowerBoundImmediate, sizeof(UINT64));
+                *lowerBoundImmediateWriterHolder.GetRW() = (size_t)g_ephemeral_low;
                 stompWBCompleteActions |= SWB_ICACHE_FLUSH;
             }
             break;
@@ -609,7 +614,8 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus
 #endif // FEATURE_SVR_GC
             if (*(UINT64*)m_pWriteWatchTableImmediate != (size_t)g_sw_ww_table)
             {
-                *(UINT64*)m_pWriteWatchTableImmediate = (size_t)g_sw_ww_table;
+                ExecutableWriterHolder<UINT64> writeWatchTableImmediateWriterHolder((UINT64*)m_pWriteWatchTableImmediate, sizeof(UINT64));
+                *writeWatchTableImmediateWriterHolder.GetRW() = (size_t)g_sw_ww_table;
                 stompWBCompleteActions |= SWB_ICACHE_FLUSH;
             }
             break;
@@ -621,14 +627,16 @@ int WriteBarrierManager::UpdateWriteWatchAndCardTableLocations(bool isRuntimeSus
 
     if (*(UINT64*)m_pCardTableImmediate != (size_t)g_card_table)
     {
-        *(UINT64*)m_pCardTableImmediate = (size_t)g_card_table;
+         ExecutableWriterHolder<UINT64> cardTableImmediateWriterHolder((UINT64*)m_pCardTableImmediate, sizeof(UINT64));
+        *cardTableImmediateWriterHolder.GetRW() = (size_t)g_card_table;
         stompWBCompleteActions |= SWB_ICACHE_FLUSH;
     }
 
 #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
     if (*(UINT64*)m_pCardBundleTableImmediate != (size_t)g_card_bundle_table)
     {
-        *(UINT64*)m_pCardBundleTableImmediate = (size_t)g_card_bundle_table;
+         ExecutableWriterHolder<UINT64> cardBundleTableImmediateWriterHolder((UINT64*)m_pCardBundleTableImmediate, sizeof(UINT64));
+        *cardBundleTableImmediateWriterHolder.GetRW() = (size_t)g_card_bundle_table;
         stompWBCompleteActions |= SWB_ICACHE_FLUSH;
     }
 #endif
diff --git a/src/coreclr/vm/arm/armsinglestepper.cpp b/src/coreclr/vm/arm/armsinglestepper.cpp
index 79317263b2223..f9e718ae5420e 100644
--- a/src/coreclr/vm/arm/armsinglestepper.cpp
+++ b/src/coreclr/vm/arm/armsinglestepper.cpp
@@ -97,11 +97,7 @@ ArmSingleStepper::ArmSingleStepper()
 ArmSingleStepper::~ArmSingleStepper()
 {
 #if !defined(DACCESS_COMPILE)
-#ifdef TARGET_UNIX
     SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(WORD));
-#else
-    DeleteExecutable(m_rgCode);
-#endif
 #endif
 }
 
@@ -110,11 +106,7 @@ void ArmSingleStepper::Init()
 #if !defined(DACCESS_COMPILE)
     if (m_rgCode == NULL)
     {
-#ifdef TARGET_UNIX
         m_rgCode = (WORD *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(WORD)));
-#else
-        m_rgCode = new (executable) WORD[kMaxCodeBuffer];
-#endif
     }
 #endif
 }
@@ -287,6 +279,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
 
     DWORD idxNextInstruction = 0;
 
+    ExecutableWriterHolder<WORD> codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
+
     if (m_originalITState.InITBlock() && !ConditionHolds(pCtx, m_originalITState.CurrentCondition()))
     {
         LOG((LF_CORDB, LL_INFO100000, "ArmSingleStepper: Case 1: ITState::Clear;\n"));
@@ -295,7 +289,7 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
         //         to execute. We'll put the correct value back during fixup.
         ITState::Clear(pCtx);
         m_fSkipIT = true;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     }
     else if (TryEmulate(pCtx, opcode1, opcode2, false))
     {
@@ -308,8 +302,8 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
         m_fEmulate = true;
 
         // Set breakpoints to stop the execution.  This will get us right back here.
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     }
     else
     {
@@ -323,24 +317,24 @@ void ArmSingleStepper::Apply(T_CONTEXT *pCtx)
         //         guarantee one of them will be hit (we don't care which one -- the fixup code will update
         //         the PC and IT state to make it look as though the CPU just executed the current
         //         instruction).
-        m_rgCode[idxNextInstruction++] = opcode1;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = opcode1;
         if (Is32BitInstruction(opcode1))
-            m_rgCode[idxNextInstruction++] = opcode2;
+            codeWriterHolder.GetRW()[idxNextInstruction++] = opcode2;
 
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
-        m_rgCode[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
+        codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     }
 
     // Always terminate the redirection buffer with a breakpoint.
-    m_rgCode[idxNextInstruction++] = kBreakpointOp;
+    codeWriterHolder.GetRW()[idxNextInstruction++] = kBreakpointOp;
     _ASSERTE(idxNextInstruction <= kMaxCodeBuffer);
 
     // Set the thread up so it will redirect to our buffer when execution resumes.
     pCtx->Pc = ((DWORD)(DWORD_PTR)m_rgCode) | THUMB_CODE;
 
     // Make sure the CPU sees the updated contents of the buffer.
-    FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode));
+    FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
 
     // Done, set the state.
     m_state = Applied;
diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S
index 930395b56dc7e..3faa8fe36846e 100644
--- a/src/coreclr/vm/arm/asmhelpers.S
+++ b/src/coreclr/vm/arm/asmhelpers.S
@@ -978,6 +978,16 @@ g_rgWriteBarrierDescriptors:
 
     .global g_rgWriteBarrierDescriptors
 
+// ------------------------------------------------------------------
+// __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val)
+    LEAF_ENTRY JIT_WriteBarrier_Callable
+
+    // Branch to the write barrier
+    ldr     r2, =JIT_WriteBarrier_Loc // or R3? See targetarm.h
+    ldr     pc, [r2]
+
+    LEAF_END JIT_WriteBarrier_Callable
+
 #ifdef FEATURE_READYTORUN
 
     NESTED_ENTRY DelayLoad_MethodCall_FakeProlog, _TEXT, NoHandler
diff --git a/src/coreclr/vm/arm/asmhelpers.asm b/src/coreclr/vm/arm/asmhelpers.asm
index d20540e62090e..82596e66693dc 100644
--- a/src/coreclr/vm/arm/asmhelpers.asm
+++ b/src/coreclr/vm/arm/asmhelpers.asm
@@ -1724,6 +1724,18 @@ tempReg     SETS "$tmpReg"
 
     END_WRITE_BARRIERS
 
+    IMPORT JIT_WriteBarrier_Loc
+
+; ------------------------------------------------------------------
+; __declspec(naked) void F_CALL_CONV JIT_WriteBarrier_Callable(Object **dst, Object* val)
+    LEAF_ENTRY  JIT_WriteBarrier_Callable
+
+    ; Branch to the write barrier
+    ldr     r2, =JIT_WriteBarrier_Loc ; or R3? See targetarm.h
+    ldr     pc, [r2]
+
+    LEAF_END
+
 #ifdef FEATURE_READYTORUN
 
     NESTED_ENTRY DelayLoad_MethodCall_FakeProlog
diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h
index 88d0c6802b69d..425c286558432 100644
--- a/src/coreclr/vm/arm/cgencpu.h
+++ b/src/coreclr/vm/arm/cgencpu.h
@@ -1069,6 +1069,7 @@ struct StubPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -1095,6 +1096,7 @@ struct StubPrecode {
         return (TADDR)InterlockedCompareExchange(
             (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
 #ifdef FEATURE_PREJIT
     void Fixup(DataImage *image);
@@ -1167,6 +1169,13 @@ struct FixupPrecode {
         return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
     }
 
+    size_t GetSizeRW()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return GetBase() + sizeof(void*) - dac_cast<TADDR>(this);
+    }
+
     TADDR GetMethodDesc();
 
     PCODE GetTarget()
@@ -1175,6 +1184,7 @@ struct FixupPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -1201,6 +1211,7 @@ struct FixupPrecode {
         return (TADDR)InterlockedCompareExchange(
             (LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
     static BOOL IsFixupPrecodeByASM(PCODE addr)
     {
@@ -1256,6 +1267,7 @@ struct ThisPtrRetBufPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
@@ -1268,6 +1280,7 @@ struct ThisPtrRetBufPrecode {
         ExecutableWriterHolder<ThisPtrRetBufPrecode> precodeWriterHolder(this, sizeof(ThisPtrRetBufPrecode)); 
         return FastInterlockCompareExchange((LONG*)&precodeWriterHolder.GetRW()->m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected;
     }
+#endif // !DACCESS_COMPILE
 };
 typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode;
 
diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp
index aac3e25b18146..b2bf6e0522ea5 100644
--- a/src/coreclr/vm/arm/stubs.cpp
+++ b/src/coreclr/vm/arm/stubs.cpp
@@ -334,11 +334,19 @@ void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength)
 
 void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode)
 {
-    TADDR dst = PCODEToPINSTR(dstCode);
+    TADDR dst = (TADDR)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation((void*)dstCode));
     TADDR src = PCODEToPINSTR(srcCode);
     TADDR end = PCODEToPINSTR(endCode);
 
     size_t size = (PBYTE)end - (PBYTE)src;
+
+    ExecutableWriterHolder<void> writeBarrierWriterHolder;
+    if (IsWriteBarrierCopyEnabled())
+    {
+        writeBarrierWriterHolder = ExecutableWriterHolder<void>((void*)dst, size);
+        dst = (TADDR)writeBarrierWriterHolder.GetRW();
+    }
+
     memcpy((PVOID)dst, (PVOID)src, size);
 }
 
@@ -419,7 +427,7 @@ void UpdateGCWriteBarriers(bool postGrow = false)
     }
 #define GWB_PATCH_OFFSET(_global)                                       \
     if (pDesc->m_dw_##_global##_offset != 0xffff)                       \
-        PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset - 1), (UINT32)(dac_cast<TADDR>(_global)));
+        PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset), (UINT32)(dac_cast<TADDR>(_global)));
 
     // Iterate through the write barrier patch table created in the .clrwb section
     // (see write barrier asm code)
@@ -431,6 +439,13 @@ void UpdateGCWriteBarriers(bool postGrow = false)
         PBYTE to = FindWBMapping(pDesc->m_pFuncStart);
         if(to)
         {
+            to = (PBYTE)PCODEToPINSTR((PCODE)GetWriteBarrierCodeLocation(to));
+            ExecutableWriterHolder<BYTE> barrierWriterHolder;
+            if (IsWriteBarrierCopyEnabled())
+            {
+                barrierWriterHolder = ExecutableWriterHolder<BYTE>(to, pDesc->m_pFuncEnd - pDesc->m_pFuncStart);
+                to = barrierWriterHolder.GetRW();
+            }
             GWB_PATCH_OFFSET(g_lowest_address);
             GWB_PATCH_OFFSET(g_highest_address);
             GWB_PATCH_OFFSET(g_ephemeral_low);
diff --git a/src/coreclr/vm/arm64/arm64singlestepper.cpp b/src/coreclr/vm/arm64/arm64singlestepper.cpp
index d45925311a33e..6c1764647c9f2 100644
--- a/src/coreclr/vm/arm64/arm64singlestepper.cpp
+++ b/src/coreclr/vm/arm64/arm64singlestepper.cpp
@@ -46,11 +46,7 @@ Arm64SingleStepper::Arm64SingleStepper()
 Arm64SingleStepper::~Arm64SingleStepper()
 {
 #if !defined(DACCESS_COMPILE)
-#ifdef TARGET_UNIX
     SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->BackoutMem(m_rgCode, kMaxCodeBuffer * sizeof(uint32_t));
-#else
-    DeleteExecutable(m_rgCode);
-#endif
 #endif
 }
 
@@ -59,11 +55,7 @@ void Arm64SingleStepper::Init()
 #if !defined(DACCESS_COMPILE)
     if (m_rgCode == NULL)
     {
-#ifdef TARGET_UNIX
         m_rgCode = (uint32_t *)(void *)SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()->AllocMem(S_SIZE_T(kMaxCodeBuffer * sizeof(uint32_t)));
-#else
-        m_rgCode = new (executable) uint32_t[kMaxCodeBuffer];
-#endif
     }
 #endif
 }
@@ -207,7 +199,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx)
 
     unsigned int idxNextInstruction = 0;
 
-    ExecutableWriterHolder<DWORD> codeWriterHolder(m_rgCode, sizeof(m_rgCode));
+    ExecutableWriterHolder<DWORD> codeWriterHolder(m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
 
     if (TryEmulate(pCtx, opcode, false))
     {
@@ -230,7 +222,7 @@ void Arm64SingleStepper::Apply(T_CONTEXT *pCtx)
     pCtx->Pc = (uint64_t)m_rgCode;
 
     // Make sure the CPU sees the updated contents of the buffer.
-    FlushInstructionCache(GetCurrentProcess(), m_rgCode, sizeof(m_rgCode));
+    FlushInstructionCache(GetCurrentProcess(), m_rgCode, kMaxCodeBuffer * sizeof(m_rgCode[0]));
 
     // Done, set the state.
     m_state = Applied;
diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S
index e6b47d07b2b0c..8ef66586cd22c 100644
--- a/src/coreclr/vm/arm64/asmhelpers.S
+++ b/src/coreclr/vm/arm64/asmhelpers.S
@@ -270,13 +270,9 @@ LOCAL_LABEL(EphemeralCheckEnabled):
     ldr  x7, [x12]
 
     // Update wbs state
-#ifdef FEATURE_WRITEBARRIER_COPY
     PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Table_Loc, x12
     ldr  x12, [x12]
     add  x12, x12, x9
-#else // FEATURE_WRITEBARRIER_COPY
-    adr  x12, LOCAL_LABEL(wbs_begin)
-#endif // FEATURE_WRITEBARRIER_COPY
 
     stp  x0, x1, [x12], 16
     stp  x2, x3, [x12], 16
@@ -295,16 +291,10 @@ LEAF_ENTRY  JIT_WriteBarrier_Callable, _TEXT
     mov     x14, x0                     // x14 = dst
     mov     x15, x1                     // x15 = val
 
-#ifdef FEATURE_WRITEBARRIER_COPY
-LOCAL_LABEL(Branch_JIT_WriteBarrier_Copy):
     // Branch to the write barrier
     PREPARE_EXTERNAL_VAR JIT_WriteBarrier_Loc, x17
     ldr     x17, [x17]
     br      x17
-#else // FEATURE_WRITEBARRIER_COPY
-    // Branch to the write barrier
-    b       C_FUNC(JIT_WriteBarrier)
-#endif // FEATURE_WRITEBARRIER_COPY
 LEAF_END JIT_WriteBarrier_Callable, _TEXT
 
 .balign 64  // Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line
diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm
index ffbeb9fd1acb3..17d3a676940bd 100644
--- a/src/coreclr/vm/arm64/asmhelpers.asm
+++ b/src/coreclr/vm/arm64/asmhelpers.asm
@@ -61,6 +61,10 @@
 #ifdef FEATURE_COMINTEROP
     IMPORT CLRToCOMWorker
 #endif // FEATURE_COMINTEROP
+
+    IMPORT JIT_WriteBarrier_Table_Loc
+    IMPORT JIT_WriteBarrier_Loc
+
     TEXTAREA
 
 ;; LPVOID __stdcall GetCurrentIP(void);
@@ -308,6 +312,7 @@ ThePreStubPatchLabel
         ; x12 will be used for pointers
 
         mov      x8, x0
+        mov      x9, x1
 
         adrp     x12, g_card_table
         ldr      x0, [x12, g_card_table]
@@ -346,7 +351,9 @@ EphemeralCheckEnabled
         ldr      x7, [x12, g_highest_address]
 
         ; Update wbs state
-        adr      x12, wbs_begin
+        adrp     x12, JIT_WriteBarrier_Table_Loc
+        ldr      x12, [x12, JIT_WriteBarrier_Table_Loc]
+        add      x12, x12, x9
         stp      x0, x1, [x12], 16
         stp      x2, x3, [x12], 16
         stp      x4, x5, [x12], 16
@@ -355,9 +362,11 @@ EphemeralCheckEnabled
         EPILOG_RESTORE_REG_PAIR fp, lr, #16!
         EPILOG_RETURN
 
+    WRITE_BARRIER_END JIT_UpdateWriteBarrierState
+
         ; Begin patchable literal pool
         ALIGN 64  ; Align to power of two at least as big as patchable literal pool so that it fits optimally in cache line
-
+    WRITE_BARRIER_ENTRY JIT_WriteBarrier_Table
 wbs_begin
 wbs_card_table
         DCQ 0
@@ -375,14 +384,7 @@ wbs_lowest_address
         DCQ 0
 wbs_highest_address
         DCQ 0
-
-    WRITE_BARRIER_END JIT_UpdateWriteBarrierState
-
-; ------------------------------------------------------------------
-; End of the writeable code region
-    LEAF_ENTRY JIT_PatchedCodeLast
-        ret      lr
-    LEAF_END
+    WRITE_BARRIER_END JIT_WriteBarrier_Table
 
 ; void JIT_ByRefWriteBarrier
 ; On entry:
@@ -546,6 +548,12 @@ Exit
         ret      lr
     WRITE_BARRIER_END JIT_WriteBarrier
 
+; ------------------------------------------------------------------
+; End of the writeable code region
+    LEAF_ENTRY JIT_PatchedCodeLast
+        ret      lr
+    LEAF_END
+
 #ifdef FEATURE_PREJIT
 ;------------------------------------------------
 ; VirtualMethodFixupStub
@@ -1417,9 +1425,10 @@ CallHelper2
     mov     x14, x0                     ; x14 = dst
     mov     x15, x1                     ; x15 = val
 
-    ; Branch to the write barrier (which is already correctly overwritten with
-    ; single or multi-proc code based on the current CPU
-    b       JIT_WriteBarrier
+    ; Branch to the write barrier
+    adrp    x17, JIT_WriteBarrier_Loc
+    ldr     x17, [x17, JIT_WriteBarrier_Loc]
+    br      x17
 
     LEAF_END
 
diff --git a/src/coreclr/vm/arm64/cgencpu.h b/src/coreclr/vm/arm64/cgencpu.h
index 83e56cfb9f9b9..0641d89ff1a91 100644
--- a/src/coreclr/vm/arm64/cgencpu.h
+++ b/src/coreclr/vm/arm64/cgencpu.h
@@ -597,6 +597,7 @@ struct StubPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -623,6 +624,7 @@ struct StubPrecode {
         return (TADDR)InterlockedCompareExchange64(
             (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
 #ifdef FEATURE_PREJIT
     void Fixup(DataImage *image);
@@ -715,6 +717,13 @@ struct FixupPrecode {
         return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
     }
 
+    size_t GetSizeRW()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return GetBase() + sizeof(void*) - dac_cast<TADDR>(this);
+    }
+
     TADDR GetMethodDesc();
 
     PCODE GetTarget()
@@ -723,6 +732,7 @@ struct FixupPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -749,6 +759,7 @@ struct FixupPrecode {
         return (TADDR)InterlockedCompareExchange64(
             (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 
     static BOOL IsFixupPrecodeByASM(PCODE addr)
     {
@@ -797,6 +808,7 @@ struct ThisPtrRetBufPrecode {
         return m_pTarget;
     }
 
+#ifndef DACCESS_COMPILE
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
@@ -810,6 +822,7 @@ struct ThisPtrRetBufPrecode {
         return (TADDR)InterlockedCompareExchange64(
             (LONGLONG*)&precodeWriterHolder.GetRW()->m_pTarget, (TADDR)target, (TADDR)expected) == expected;
     }
+#endif // !DACCESS_COMPILE
 };
 typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode;
 
diff --git a/src/coreclr/vm/arm64/stubs.cpp b/src/coreclr/vm/arm64/stubs.cpp
index 54cf1c4927548..12d56ddb9867e 100644
--- a/src/coreclr/vm/arm64/stubs.cpp
+++ b/src/coreclr/vm/arm64/stubs.cpp
@@ -1067,8 +1067,14 @@ extern "C" void STDCALL JIT_PatchedCodeLast();
 static void UpdateWriteBarrierState(bool skipEphemeralCheck)
 {
     BYTE *writeBarrierCodeStart = GetWriteBarrierCodeLocation((void*)JIT_PatchedCodeStart);
-    ExecutableWriterHolder<BYTE> writeBarrierWriterHolder(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart);
-    JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierWriterHolder.GetRW() - writeBarrierCodeStart);
+    BYTE *writeBarrierCodeStartRW = writeBarrierCodeStart;
+    ExecutableWriterHolder<BYTE> writeBarrierWriterHolder;
+    if (IsWriteBarrierCopyEnabled())
+    {
+        writeBarrierWriterHolder = ExecutableWriterHolder<BYTE>(writeBarrierCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart);
+        writeBarrierCodeStartRW = writeBarrierWriterHolder.GetRW();
+    }
+    JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap(), writeBarrierCodeStartRW - writeBarrierCodeStart);
 }
 
 void InitJITHelpers1()
diff --git a/src/coreclr/vm/ceemain.cpp b/src/coreclr/vm/ceemain.cpp
index cdc5925234af9..b60aac924d2e2 100644
--- a/src/coreclr/vm/ceemain.cpp
+++ b/src/coreclr/vm/ceemain.cpp
@@ -607,6 +607,11 @@ void EESocketCleanupHelper(bool isExecutingOnAltStack)
 #endif // TARGET_UNIX
 #endif // CROSSGEN_COMPILE
 
+void FatalErrorHandler(UINT errorCode, LPCWSTR pszMessage)
+{
+    EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(errorCode, pszMessage);
+}
+
 void EEStartupHelper()
 {
     CONTRACTL
@@ -670,6 +675,8 @@ void EEStartupHelper()
         // This needs to be done before the EE has started
         InitializeStartupFlags();
 
+        IfFailGo(ExecutableAllocator::StaticInitialize(FatalErrorHandler));
+
         ThreadpoolMgr::StaticInitialize();
 
         MethodDescBackpatchInfoTracker::StaticInitialize();
@@ -824,7 +831,7 @@ void EEStartupHelper()
 
             g_runtimeLoadedBaseAddress = (SIZE_T)pe.GetBase();
             g_runtimeVirtualSize = (SIZE_T)pe.GetVirtualSize();
-            InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64));
+            ExecutableAllocator::InitCodeAllocHint(g_runtimeLoadedBaseAddress, g_runtimeVirtualSize, GetRandomInt(64));
         }
 #endif // !TARGET_UNIX
 
diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp
index 02feec829a76b..5c5004f56860a 100644
--- a/src/coreclr/vm/class.cpp
+++ b/src/coreclr/vm/class.cpp
@@ -153,7 +153,9 @@ void EEClass::Destruct(MethodTable * pOwningMT)
 
         if (pDelegateEEClass->m_pStaticCallStub)
         {
-            BOOL fStubDeleted = pDelegateEEClass->m_pStaticCallStub->DecRef();
+            ExecutableWriterHolder<Stub> stubWriterHolder(pDelegateEEClass->m_pStaticCallStub, sizeof(Stub));
+            BOOL fStubDeleted = stubWriterHolder.GetRW()->DecRef();
+
             if (fStubDeleted)
             {
                 DelegateInvokeStubManager::g_pManager->RemoveStub(pDelegateEEClass->m_pStaticCallStub);
@@ -167,7 +169,6 @@ void EEClass::Destruct(MethodTable * pOwningMT)
         // it is owned by the m_pMulticastStubCache, not by the class
         // - it is shared across classes. So we don't decrement
         // its ref count here
-        delete pDelegateEEClass->m_pUMThunkMarshInfo;
     }
 
 #ifdef FEATURE_COMINTEROP
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index 37220786fedda..78721292a3e9f 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -2139,8 +2139,7 @@ VOID EEJitManager::EnsureJumpStubReserve(BYTE * pImageBase, SIZE_T imageSize, SI
                 return; // Unable to allocate the reserve - give up
             }
 
-            pNewReserve->m_ptr = ClrVirtualAllocWithinRange(loAddrCurrent, hiAddrCurrent,
-                                               allocChunk, MEM_RESERVE, PAGE_NOACCESS);
+            pNewReserve->m_ptr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(allocChunk, loAddrCurrent, hiAddrCurrent);
 
             if (pNewReserve->m_ptr != NULL)
                 break;
@@ -2231,8 +2230,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap
             if (!pInfo->getThrowOnOutOfMemoryWithinRange() && PEDecoder::GetForceRelocs())
                 RETURN NULL;
 #endif
-            pBaseAddr = ClrVirtualAllocWithinRange(loAddr, hiAddr,
-                                                   reserveSize, MEM_RESERVE, PAGE_NOACCESS);
+            pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(reserveSize, loAddr, hiAddr);
 
             if (!pBaseAddr)
             {
@@ -2251,7 +2249,7 @@ HeapList* LoaderCodeHeap::CreateCodeHeap(CodeHeapRequestInfo *pInfo, LoaderHeap
         }
         else
         {
-            pBaseAddr = ClrVirtualAllocExecutable(reserveSize, MEM_RESERVE, PAGE_NOACCESS);
+            pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(reserveSize);
             if (!pBaseAddr)
                 ThrowOutOfMemory();
         }
@@ -2686,15 +2684,14 @@ void EEJitManager::allocCode(MethodDesc* pMD, size_t blockSize, size_t reserveFo
 
         *pAllocatedSize = sizeof(CodeHeader) + totalSize;
 
-#if defined(HOST_OSX) && defined(HOST_ARM64)
-#define FEATURE_WXORX
-#endif
-
-#ifdef FEATURE_WXORX
-        pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize];
-#else
-        pCodeHdrRW = pCodeHdr;
-#endif
+        if (ExecutableAllocator::IsWXORXEnabled())
+        {
+            pCodeHdrRW = (CodeHeader *)new BYTE[*pAllocatedSize];
+        }
+        else
+        {
+            pCodeHdrRW = pCodeHdr;
+        }
 
 #ifdef USE_INDIRECT_CODEHEADER
         if (requestInfo.IsDynamicDomain())
@@ -3347,7 +3344,7 @@ void EEJitManager::Unload(LoaderAllocator *pAllocator)
         }
     }
 
-    ResetCodeAllocHint();
+    ExecutableAllocator::ResetCodeAllocHint();
 }
 
 EEJitManager::DomainCodeHeapList::DomainCodeHeapList()
diff --git a/src/coreclr/vm/comcallablewrapper.cpp b/src/coreclr/vm/comcallablewrapper.cpp
index 8b95dac8cdd77..499880dc16dde 100644
--- a/src/coreclr/vm/comcallablewrapper.cpp
+++ b/src/coreclr/vm/comcallablewrapper.cpp
@@ -3183,12 +3183,11 @@ void ComMethodTable::Cleanup()
 
     if (m_pDispatchInfo)
         delete m_pDispatchInfo;
-    if (m_pMDescr)
-        DeleteExecutable(m_pMDescr);
     if (m_pITypeInfo && !g_fProcessDetach)
         SafeRelease(m_pITypeInfo);
 
-    DeleteExecutable(this);
+    // The m_pMDescr and the current instance is allocated from the related LoaderAllocator
+    // so no cleanup is needed here.
 }
 
 
@@ -3214,7 +3213,7 @@ void ComMethodTable::LayOutClassMethodTable()
     SLOT *pComVtable;
     unsigned cbPrevSlots = 0;
     unsigned cbAlloc = 0;
-    NewExecutableHolder<BYTE>  pMDMemoryPtr = NULL;
+    AllocMemHolder<BYTE> pMDMemoryPtr;
     BYTE*  pMethodDescMemory = NULL;
     size_t writeableOffset = 0;
     unsigned cbNumParentVirtualMethods = 0;
@@ -3321,7 +3320,7 @@ void ComMethodTable::LayOutClassMethodTable()
         cbAlloc = cbMethodDescs;
         if (cbAlloc > 0)
         {
-            pMDMemoryPtr = (BYTE*) new (executable) BYTE[cbAlloc + sizeof(UINT_PTR)];
+            pMDMemoryPtr = m_pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbAlloc + sizeof(UINT_PTR)));
             pMethodDescMemory = pMDMemoryPtr;
 
             methodDescMemoryWriteableHolder = ExecutableWriterHolder<BYTE>(pMethodDescMemory, cbAlloc + sizeof(UINT_PTR));
@@ -3703,7 +3702,6 @@ BOOL ComMethodTable::LayOutInterfaceMethodTable(MethodTable* pClsMT)
         // Method descs are at the end of the vtable
         // m_cbSlots interfaces methods + IUnk methods
         pMethodDescMemory = (BYTE *)&pComVtable[m_cbSlots];
-
         for (i = 0; i < cbSlots; i++)
         {
             ComCallMethodDesc* pNewMD = (ComCallMethodDesc *) (pMethodDescMemory + COMMETHOD_PREPAD);
@@ -4495,13 +4493,12 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForClass(MethodTable
     if (cbToAlloc.IsOverflow())
         ThrowHR(COR_E_OVERFLOW);
 
-    NewExecutableHolder<ComMethodTable> pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()];
+    AllocMemHolder<ComMethodTable> pComMT(pClassMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value())));
 
     _ASSERTE(!cbNewSlots.IsOverflow() && !cbTotalSlots.IsOverflow() && !cbVtable.IsOverflow());
 
     ExecutableWriterHolder<ComMethodTable> comMTWriterHolder(pComMT, cbToAlloc.Value());
     ComMethodTable* pComMTRW = comMTWriterHolder.GetRW();
-
     // set up the header
     pComMTRW->m_ptReserved = (SLOT)(size_t)0xDEADC0FF;          // reserved
     pComMTRW->m_pMT  = pClassMT; // pointer to the class method table
@@ -4573,7 +4570,7 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForInterface(MethodT
     if (cbToAlloc.IsOverflow())
         ThrowHR(COR_E_OVERFLOW);
 
-    NewExecutableHolder<ComMethodTable> pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc.Value()];
+    AllocMemHolder<ComMethodTable> pComMT(pInterfaceMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc.Value())));
 
     _ASSERTE(!cbVtable.IsOverflow() && !cbMethDescs.IsOverflow());
 
@@ -4639,7 +4636,8 @@ ComMethodTable* ComCallWrapperTemplate::CreateComMethodTableForBasic(MethodTable
     unsigned cbVtable    = cbExtraSlots * sizeof(SLOT);
     unsigned cbToAlloc   = sizeof(ComMethodTable) + cbVtable;
 
-    NewExecutableHolder<ComMethodTable> pComMT = (ComMethodTable*) new (executable) BYTE[cbToAlloc];
+    AllocMemHolder<ComMethodTable> pComMT(pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(cbToAlloc)));
+
     ExecutableWriterHolder<ComMethodTable> comMTWriterHolder(pComMT, cbToAlloc);
     ComMethodTable* pComMTRW = comMTWriterHolder.GetRW();
 
diff --git a/src/coreclr/vm/comcallablewrapper.h b/src/coreclr/vm/comcallablewrapper.h
index 2581ddf832fd5..0f1e4b878e4c9 100644
--- a/src/coreclr/vm/comcallablewrapper.h
+++ b/src/coreclr/vm/comcallablewrapper.h
@@ -499,6 +499,7 @@ struct ComMethodTable
     // Accessor for the IDispatch information.
     DispatchInfo* GetDispatchInfo();
 
+#ifndef DACCESS_COMPILE
     LONG AddRef()
     {
         LIMITED_METHOD_CONTRACT;
@@ -527,6 +528,7 @@ struct ComMethodTable
 
         return cbRef;
     }
+#endif // DACCESS_COMPILE
 
     CorIfaceAttr GetInterfaceType()
     {
@@ -746,6 +748,7 @@ struct ComMethodTable
     }
 
 
+#ifndef DACCESS_COMPILE
     inline REFIID GetIID()
     {
         // Cannot use a normal CONTRACT since the return type is ref type which
@@ -768,6 +771,7 @@ struct ComMethodTable
 
         return m_IID;
     }
+#endif // DACCESS_COMPILE
 
     void CheckParentComVisibility(BOOL fForIDispatch)
     {
diff --git a/src/coreclr/vm/comdelegate.cpp b/src/coreclr/vm/comdelegate.cpp
index b6c17260a1302..1b61e16dec5d3 100644
--- a/src/coreclr/vm/comdelegate.cpp
+++ b/src/coreclr/vm/comdelegate.cpp
@@ -1253,7 +1253,7 @@ LPVOID COMDelegate::ConvertToCallback(OBJECTREF pDelegateObj)
             {
                 GCX_PREEMP();
 
-                pUMThunkMarshInfo = new UMThunkMarshInfo();
+                pUMThunkMarshInfo = (UMThunkMarshInfo*)(void*)pMT->GetLoaderAllocator()->GetStubHeap()->AllocMem(S_SIZE_T(sizeof(UMThunkMarshInfo)));
 
                 ExecutableWriterHolder<UMThunkMarshInfo> uMThunkMarshInfoWriterHolder(pUMThunkMarshInfo, sizeof(UMThunkMarshInfo));
                 uMThunkMarshInfoWriterHolder.GetRW()->LoadTimeInit(pInvokeMeth);
diff --git a/src/coreclr/vm/dynamicmethod.cpp b/src/coreclr/vm/dynamicmethod.cpp
index 9dae86aca9377..541d88dc16885 100644
--- a/src/coreclr/vm/dynamicmethod.cpp
+++ b/src/coreclr/vm/dynamicmethod.cpp
@@ -403,8 +403,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo)
 
     if (pInfo->m_loAddr != NULL || pInfo->m_hiAddr != NULL)
     {
-        m_pBaseAddr = ClrVirtualAllocWithinRange(pInfo->m_loAddr, pInfo->m_hiAddr,
-            ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS);
+        m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->ReserveWithinRange(ReserveBlockSize, pInfo->m_loAddr, pInfo->m_hiAddr);
         if (!m_pBaseAddr)
         {
             if (pInfo->getThrowOnOutOfMemoryWithinRange())
@@ -417,7 +416,7 @@ HeapList* HostCodeHeap::InitializeHeapList(CodeHeapRequestInfo *pInfo)
         // top up the ReserveBlockSize to suggested minimum
         ReserveBlockSize = max(ReserveBlockSize, pInfo->getReserveSize());
 
-        m_pBaseAddr = ClrVirtualAllocExecutable(ReserveBlockSize, MEM_RESERVE, PAGE_NOACCESS);
+        m_pBaseAddr = (BYTE*)ExecutableAllocator::Instance()->Reserve(ReserveBlockSize);
         if (!m_pBaseAddr)
             ThrowOutOfMemory();
     }
@@ -749,7 +748,7 @@ HostCodeHeap::TrackAllocation* HostCodeHeap::AllocMemory_NoThrow(size_t header,
 
         if (m_pLastAvailableCommittedAddr + sizeToCommit <= m_pBaseAddr + m_TotalBytesAvailable)
         {
-            if (NULL == ClrVirtualAlloc(m_pLastAvailableCommittedAddr, sizeToCommit, MEM_COMMIT, PAGE_EXECUTE_READWRITE))
+            if (NULL == ExecutableAllocator::Instance()->Commit(m_pLastAvailableCommittedAddr, sizeToCommit, true /* isExecutable */))
             {
                 LOG((LF_BCL, LL_ERROR, "CodeHeap [0x%p] - VirtualAlloc failed\n", this));
                 return NULL;
diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp
index 1b192e683695a..55828b7c22b86 100644
--- a/src/coreclr/vm/excep.cpp
+++ b/src/coreclr/vm/excep.cpp
@@ -6679,14 +6679,12 @@ AdjustContextForJITHelpers(
 
     PCODE ip = GetIP(pContext);
 
-#ifdef FEATURE_WRITEBARRIER_COPY
     if (IsIPInWriteBarrierCodeCopy(ip))
     {
         // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
         ip = AdjustWriteBarrierIP(ip);
         SetIP(pContext, ip);
     }
-#endif // FEATURE_WRITEBARRIER_COPY
 
 #ifdef FEATURE_DATABREAKPOINT
 
diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp
index 7fff234ca85ef..4af702fab1499 100644
--- a/src/coreclr/vm/exceptionhandling.cpp
+++ b/src/coreclr/vm/exceptionhandling.cpp
@@ -4694,14 +4694,12 @@ VOID DECLSPEC_NORETURN UnwindManagedExceptionPass1(PAL_SEHException& ex, CONTEXT
                 break;
             }
 
-#ifdef FEATURE_WRITEBARRIER_COPY
             if (IsIPInWriteBarrierCodeCopy(controlPc))
             {
                 // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
                 controlPc = AdjustWriteBarrierIP(controlPc);
                 SetIP(frameContext, controlPc);
             }
-#endif // FEATURE_WRITEBARRIER_COPY
 
             UINT_PTR sp = GetSP(frameContext);
 
@@ -5174,13 +5172,11 @@ BOOL IsSafeToHandleHardwareException(PCONTEXT contextRecord, PEXCEPTION_RECORD e
 {
     PCODE controlPc = GetIP(contextRecord);
 
-#ifdef FEATURE_WRITEBARRIER_COPY
     if (IsIPInWriteBarrierCodeCopy(controlPc))
     {
         // Pretend we were executing the barrier function at its original location
         controlPc = AdjustWriteBarrierIP(controlPc);
     }
-#endif // FEATURE_WRITEBARRIER_COPY
 
     return g_fEEStarted && (
         exceptionRecord->ExceptionCode == STATUS_BREAKPOINT ||
@@ -5259,14 +5255,12 @@ BOOL HandleHardwareException(PAL_SEHException* ex)
         {
             GCX_COOP();     // Must be cooperative to modify frame chain.
 
-#ifdef FEATURE_WRITEBARRIER_COPY
             if (IsIPInWriteBarrierCodeCopy(controlPc))
             {
                 // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
                 controlPc = AdjustWriteBarrierIP(controlPc);
                 SetIP(ex->GetContextRecord(), controlPc);
             }
-#endif // FEATURE_WRITEBARRIER_COPY
 
             if (IsIPInMarkedJitHelper(controlPc))
             {
diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp
index be856dbe1a63a..9ce0cc676f7a7 100644
--- a/src/coreclr/vm/gccover.cpp
+++ b/src/coreclr/vm/gccover.cpp
@@ -1258,9 +1258,9 @@ void RemoveGcCoverageInterrupt(TADDR instrPtr, BYTE * savedInstrPtr, GCCoverageI
 {
     ExecutableWriterHolder<void> instrPtrWriterHolder((void*)instrPtr, 4);
 #ifdef TARGET_ARM
-        if (GetARMInstructionLength(savedInstrPtr) == 2)
+    if (GetARMInstructionLength(savedInstrPtr) == 2)
         *(WORD *)instrPtrWriterHolder.GetRW()  = *(WORD *)savedInstrPtr;
-        else
+    else
         *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr;
 #elif defined(TARGET_ARM64)
     *(DWORD *)instrPtrWriterHolder.GetRW() = *(DWORD *)savedInstrPtr;
diff --git a/src/coreclr/vm/i386/jithelp.S b/src/coreclr/vm/i386/jithelp.S
index facce7cacd3ef..dc56da1d1779e 100644
--- a/src/coreclr/vm/i386/jithelp.S
+++ b/src/coreclr/vm/i386/jithelp.S
@@ -377,10 +377,27 @@ LEAF_ENTRY JIT_WriteBarrierGroup, _TEXT
     ret
 LEAF_END JIT_WriteBarrierGroup, _TEXT
 
-#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
-// *******************************************************************************
-//  Write barrier wrappers with fcall calling convention
-//
+    .data
+    .align 4
+    .global C_FUNC(JIT_WriteBarrierEAX_Loc)
+C_FUNC(JIT_WriteBarrierEAX_Loc):
+    .word 0
+    .text
+
+LEAF_ENTRY JIT_WriteBarrier_Callable, _TEXT
+    mov eax, edx
+    mov edx, ecx
+    push    eax
+    call    1f
+1:
+    pop     eax
+2:
+    add     eax, offset _GLOBAL_OFFSET_TABLE_+1 // (2b - 1b)
+    mov     eax, dword ptr [eax + C_FUNC(JIT_WriteBarrierEAX_Loc)@GOT]
+    xchg    eax, dword ptr [esp]
+    ret
+LEAF_END JIT_WriteBarrier_Callable, _TEXT
+
 .macro UniversalWriteBarrierHelper name
 .align 4
 
@@ -392,6 +409,11 @@ LEAF_END JIT_\name, _TEXT
 
 .endm
 
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+// *******************************************************************************
+//  Write barrier wrappers with fcall calling convention
+//
+
 // Only define these if we're using the ASM GC write barriers; if this flag is not defined,
 // we'll use C++ versions of these write barriers.
 UniversalWriteBarrierHelper CheckedWriteBarrier
diff --git a/src/coreclr/vm/i386/jithelp.asm b/src/coreclr/vm/i386/jithelp.asm
index 3743ac3cbe02f..3650b3f2afd6d 100644
--- a/src/coreclr/vm/i386/jithelp.asm
+++ b/src/coreclr/vm/i386/jithelp.asm
@@ -411,15 +411,13 @@ ENDM
 ;*******************************************************************************
 ; Write barrier wrappers with fcall calling convention
 ;
-UniversalWriteBarrierHelper MACRO name
+
+        .data
         ALIGN 4
-PUBLIC @JIT_&name&@8
-@JIT_&name&@8 PROC
-        mov eax,edx
-        mov edx,ecx
-        jmp _JIT_&name&EAX@0
-@JIT_&name&@8 ENDP
-ENDM
+        public  _JIT_WriteBarrierEAX_Loc
+_JIT_WriteBarrierEAX_Loc dd 0
+
+        .code
 
 ; WriteBarrierStart and WriteBarrierEnd are used to determine bounds of
 ; WriteBarrier functions so can determine if got AV in them.
@@ -429,6 +427,25 @@ _JIT_WriteBarrierGroup@0 PROC
 ret
 _JIT_WriteBarrierGroup@0 ENDP
 
+        ALIGN 4
+PUBLIC @JIT_WriteBarrier_Callable@8
+@JIT_WriteBarrier_Callable@8 PROC
+        mov eax,edx
+        mov edx,ecx
+        jmp DWORD PTR [_JIT_WriteBarrierEAX_Loc]
+
+@JIT_WriteBarrier_Callable@8 ENDP
+
+UniversalWriteBarrierHelper MACRO name
+        ALIGN 4
+PUBLIC @JIT_&name&@8
+@JIT_&name&@8 PROC
+        mov eax,edx
+        mov edx,ecx
+        jmp _JIT_&name&EAX@0
+@JIT_&name&@8 ENDP
+ENDM
+
 ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
 ; Only define these if we're using the ASM GC write barriers; if this flag is not defined,
 ; we'll use C++ versions of these write barriers.
@@ -1233,6 +1250,8 @@ fremloopd:
 ; PatchedCodeStart and PatchedCodeEnd are used to determine bounds of patched code.
 ;
 
+            ALIGN 4
+
 _JIT_PatchedCodeStart@0 proc public
 ret
 _JIT_PatchedCodeStart@0 endp
diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp
index cefe7ecadc5e9..c5ebf8e0cf15c 100644
--- a/src/coreclr/vm/i386/jitinterfacex86.cpp
+++ b/src/coreclr/vm/i386/jitinterfacex86.cpp
@@ -1039,10 +1039,18 @@ void InitJITHelpers1()
     {
         BYTE * pfunc = (BYTE *) JIT_WriteBarrierReg_PreGrow;
 
-        BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+        BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]);
         int reg = c_rgWriteBarrierRegs[iBarrier];
 
-        memcpy(pBuf, pfunc, 34);
+        BYTE * pBufRW = pBuf;
+        ExecutableWriterHolder<BYTE> barrierWriterHolder;
+        if (IsWriteBarrierCopyEnabled())
+        {
+            barrierWriterHolder = ExecutableWriterHolder<BYTE>(pBuf, 34);
+            pBufRW = barrierWriterHolder.GetRW();
+        }
+
+        memcpy(pBufRW, pfunc, 34);
 
         // assert the copied code ends in a ret to make sure we got the right length
         _ASSERTE(pBuf[33] == 0xC3);
@@ -1058,24 +1066,24 @@ void InitJITHelpers1()
 
         _ASSERTE(pBuf[0] == 0x89);
         // Update the reg field (bits 3..5) of the ModR/M byte of this instruction
-        pBuf[1] &= 0xc7;
-        pBuf[1] |= reg << 3;
+        pBufRW[1] &= 0xc7;
+        pBufRW[1] |= reg << 3;
 
         // Second instruction to patch is cmp reg, imm32 (low bound)
 
         _ASSERTE(pBuf[2] == 0x81);
         // Here the lowest three bits in ModR/M field are the register
-        pBuf[3] &= 0xf8;
-        pBuf[3] |= reg;
+        pBufRW[3] &= 0xf8;
+        pBufRW[3] |= reg;
 
 #ifdef WRITE_BARRIER_CHECK
         // Don't do the fancy optimization just jump to the old one
         // Use the slow one from time to time in a debug build because
         // there are some good asserts in the unoptimized one
         if ((g_pConfig->GetHeapVerifyLevel() & EEConfig::HEAPVERIFY_BARRIERCHECK) || DEBUG_RANDOM_BARRIER_CHECK) {
-            pfunc = &pBuf[0];
+            pfunc = &pBufRW[0];
             *pfunc++ = 0xE9;                // JMP c_rgDebugWriteBarriers[iBarrier]
-            *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (pfunc + sizeof(DWORD));
+            *((DWORD*) pfunc) = (BYTE*) c_rgDebugWriteBarriers[iBarrier] - (&pBuf[1] + sizeof(DWORD));
         }
 #endif // WRITE_BARRIER_CHECK
     }
@@ -1121,7 +1129,7 @@ void ValidateWriteBarrierHelpers()
 #endif // WRITE_BARRIER_CHECK
 
     // first validate the PreGrow helper
-    BYTE* pWriteBarrierFunc = reinterpret_cast<BYTE*>(JIT_WriteBarrierEAX);
+    BYTE* pWriteBarrierFunc = GetWriteBarrierCodeLocation(reinterpret_cast<BYTE*>(JIT_WriteBarrierEAX));
 
     // ephemeral region
     DWORD* pLocation = reinterpret_cast<DWORD*>(&pWriteBarrierFunc[AnyGrow_EphemeralLowerBound]);
@@ -1159,7 +1167,7 @@ void ValidateWriteBarrierHelpers()
 #endif //CODECOVERAGE
 /*********************************************************************/
 
-#define WriteBarrierIsPreGrow() (((BYTE *)JIT_WriteBarrierEAX)[10] == 0xc1)
+#define WriteBarrierIsPreGrow() ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[10] == 0xc1)
 
 
 /*********************************************************************/
@@ -1177,20 +1185,28 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
 
 #ifdef WRITE_BARRIER_CHECK
         // Don't do the fancy optimization if we are checking write barrier
-    if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9)  // we are using slow write barrier
+    if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9)  // we are using slow write barrier
         return stompWBCompleteActions;
 #endif // WRITE_BARRIER_CHECK
 
     // Update the lower bound.
     for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
     {
-        BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+        BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]);
+
+        BYTE * pBufRW = pBuf;
+        ExecutableWriterHolder<BYTE> barrierWriterHolder;
+        if (IsWriteBarrierCopyEnabled())
+        {
+            barrierWriterHolder = ExecutableWriterHolder<BYTE>(pBuf, 42);
+            pBufRW = barrierWriterHolder.GetRW();
+        }
 
         // assert there is in fact a cmp r/m32, imm32 there
         _ASSERTE(pBuf[2] == 0x81);
 
         // Update the immediate which is the lower bound of the ephemeral generation
-        size_t *pfunc = (size_t *) &pBuf[AnyGrow_EphemeralLowerBound];
+        size_t *pfunc = (size_t *) &pBufRW[AnyGrow_EphemeralLowerBound];
         //avoid trivial self modifying code
         if (*pfunc != (size_t) g_ephemeral_low)
         {
@@ -1203,7 +1219,7 @@ int StompWriteBarrierEphemeral(bool /* isRuntimeSuspended */)
             _ASSERTE(pBuf[10] == 0x81);
 
                 // Update the upper bound if we are using the PostGrow thunk.
-            pfunc = (size_t *) &pBuf[PostGrow_EphemeralUpperBound];
+            pfunc = (size_t *) &pBufRW[PostGrow_EphemeralUpperBound];
             //avoid trivial self modifying code
             if (*pfunc != (size_t) g_ephemeral_high)
             {
@@ -1233,7 +1249,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
 #ifdef WRITE_BARRIER_CHECK
         // Don't do the fancy optimization if we are checking write barrier
-    if (((BYTE *)JIT_WriteBarrierEAX)[0] == 0xE9)  // we are using slow write barrier
+    if ((GetWriteBarrierCodeLocation((BYTE *)JIT_WriteBarrierEAX))[0] == 0xE9)  // we are using slow write barrier
         return stompWBCompleteActions;
 #endif // WRITE_BARRIER_CHECK
 
@@ -1242,12 +1258,20 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
     for (int iBarrier = 0; iBarrier < NUM_WRITE_BARRIERS; iBarrier++)
     {
-        BYTE * pBuf = (BYTE *)c_rgWriteBarriers[iBarrier];
+        BYTE * pBuf = GetWriteBarrierCodeLocation((BYTE *)c_rgWriteBarriers[iBarrier]);
         int reg = c_rgWriteBarrierRegs[iBarrier];
 
         size_t *pfunc;
 
-    // Check if we are still using the pre-grow version of the write barrier.
+        BYTE * pBufRW = pBuf;
+        ExecutableWriterHolder<BYTE> barrierWriterHolder;
+        if (IsWriteBarrierCopyEnabled())
+        {
+            barrierWriterHolder = ExecutableWriterHolder<BYTE>(pBuf, 42);
+            pBufRW = barrierWriterHolder.GetRW();
+        }
+
+        // Check if we are still using the pre-grow version of the write barrier.
         if (bWriteBarrierIsPreGrow)
         {
             // Check if we need to use the upper bounds checking barrier stub.
@@ -1260,7 +1284,7 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
                 }
 
                 pfunc = (size_t *) JIT_WriteBarrierReg_PostGrow;
-                memcpy(pBuf, pfunc, 42);
+                memcpy(pBufRW, pfunc, 42);
 
                 // assert the copied code ends in a ret to make sure we got the right length
                 _ASSERTE(pBuf[41] == 0xC3);
@@ -1276,35 +1300,35 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
                 _ASSERTE(pBuf[0] == 0x89);
                 // Update the reg field (bits 3..5) of the ModR/M byte of this instruction
-                pBuf[1] &= 0xc7;
-                pBuf[1] |= reg << 3;
+                pBufRW[1] &= 0xc7;
+                pBufRW[1] |= reg << 3;
 
                 // Second instruction to patch is cmp reg, imm32 (low bound)
 
                 _ASSERTE(pBuf[2] == 0x81);
                 // Here the lowest three bits in ModR/M field are the register
-                pBuf[3] &= 0xf8;
-                pBuf[3] |= reg;
+                pBufRW[3] &= 0xf8;
+                pBufRW[3] |= reg;
 
                 // Third instruction to patch is another cmp reg, imm32 (high bound)
 
                 _ASSERTE(pBuf[10] == 0x81);
                 // Here the lowest three bits in ModR/M field are the register
-                pBuf[11] &= 0xf8;
-                pBuf[11] |= reg;
+                pBufRW[11] &= 0xf8;
+                pBufRW[11] |= reg;
 
                 bStompWriteBarrierEphemeral = true;
                 // What we're trying to update is the offset field of a
 
                 // cmp offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[22] == 0x80);
-                pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation];
+                pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation];
                *pfunc = (size_t) g_card_table;
 
                 // What we're trying to update is the offset field of a
                 // mov offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[34] == 0xC6);
-                pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation];
+                pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation];
 
             }
             else
@@ -1313,14 +1337,14 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
                 // cmp offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[14] == 0x80);
-                pfunc = (size_t *) &pBuf[PreGrow_CardTableFirstLocation];
+                pfunc = (size_t *) &pBufRW[PreGrow_CardTableFirstLocation];
                *pfunc = (size_t) g_card_table;
 
                 // What we're trying to update is the offset field of a
 
                 // mov offset[edx], 0ffh instruction
                 _ASSERTE(pBuf[26] == 0xC6);
-                pfunc = (size_t *) &pBuf[PreGrow_CardTableSecondLocation];
+                pfunc = (size_t *) &pBufRW[PreGrow_CardTableSecondLocation];
             }
         }
         else
@@ -1329,13 +1353,13 @@ int StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck)
 
             // cmp offset[edx], 0ffh instruction
             _ASSERTE(pBuf[22] == 0x80);
-            pfunc = (size_t *) &pBuf[PostGrow_CardTableFirstLocation];
+            pfunc = (size_t *) &pBufRW[PostGrow_CardTableFirstLocation];
            *pfunc = (size_t) g_card_table;
 
             // What we're trying to update is the offset field of a
             // mov offset[edx], 0ffh instruction
             _ASSERTE(pBuf[34] == 0xC6);
-            pfunc = (size_t *) &pBuf[PostGrow_CardTableSecondLocation];
+            pfunc = (size_t *) &pBufRW[PostGrow_CardTableSecondLocation];
         }
 
         // Stick in the adjustment value.
diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp
index 61c5dfd90cbfc..564363053fc6a 100644
--- a/src/coreclr/vm/i386/stublinkerx86.cpp
+++ b/src/coreclr/vm/i386/stublinkerx86.cpp
@@ -4829,7 +4829,7 @@ VOID StubLinkerCPU::EmitArrayOpStub(const ArrayOpScript* pArrayOpScript)
                     X86EmitOp(0x8d, kEDX, elemBaseReg, elemOfs, elemScaledReg, elemScale);
 
                     // call JIT_Writeable_Thunks_Buf.WriteBarrierReg[0] (== EAX)
-                    X86EmitCall(NewExternalCodeLabel((LPVOID) &JIT_WriteBarrierEAX), 0);
+                    X86EmitCall(NewExternalCodeLabel((LPVOID) GetWriteBarrierCodeLocation(&JIT_WriteBarrierEAX)), 0);
                 }
                 else
 #else // TARGET_AMD64
diff --git a/src/coreclr/vm/i386/stublinkerx86.h b/src/coreclr/vm/i386/stublinkerx86.h
index af5244d077193..564c999975e7c 100644
--- a/src/coreclr/vm/i386/stublinkerx86.h
+++ b/src/coreclr/vm/i386/stublinkerx86.h
@@ -536,7 +536,7 @@ struct StubPrecode {
 
         return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32));
     }
-
+#ifndef DACCESS_COMPILE
     void ResetTargetInterlocked()
     {
         CONTRACTL
@@ -562,6 +562,7 @@ struct StubPrecode {
         ExecutableWriterHolder<void> rel32Holder(&m_rel32, 4);
         return rel32SetInterlocked(&m_rel32, rel32Holder.GetRW(), target, expected, (MethodDesc*)GetMethodDesc());
     }
+#endif // !DACCESS_COMPILE
 };
 IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_movR10) == OFFSETOF_PRECODE_TYPE);)
 IN_TARGET_64BIT(static_assert_no_msg(offsetof(StubPrecode, m_type) == OFFSETOF_PRECODE_TYPE_MOV_R10);)
@@ -646,6 +647,13 @@ struct FixupPrecode {
         return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
     }
 
+    size_t GetSizeRW()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return GetBase() + sizeof(void*) - dac_cast<TADDR>(this);
+    }
+
     TADDR GetMethodDesc();
 #else // HAS_FIXUP_PRECODE_CHUNKS
     TADDR GetMethodDesc()
diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp
index a1e4d93d881de..882e2c29cef04 100644
--- a/src/coreclr/vm/jitinterface.cpp
+++ b/src/coreclr/vm/jitinterface.cpp
@@ -11875,7 +11875,7 @@ WORD CEEJitInfo::getRelocTypeHint(void * target)
     if (m_fAllowRel32)
     {
         // The JIT calls this method for data addresses only. It always uses REL32s for direct code targets.
-        if (IsPreferredExecutableRange(target))
+        if (ExecutableAllocator::IsPreferredExecutableRange(target))
             return IMAGE_REL_BASED_REL32;
     }
 #endif // TARGET_AMD64
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index ca9d03c2141d3..cf9617a353282 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -238,15 +238,10 @@ extern "C" FCDECL2(Object*, ChkCastAny_NoCacheLookup, CORINFO_CLASS_HANDLE type,
 extern "C" FCDECL2(Object*, IsInstanceOfAny_NoCacheLookup, CORINFO_CLASS_HANDLE type, Object* obj);
 extern "C" FCDECL2(LPVOID, Unbox_Helper, CORINFO_CLASS_HANDLE type, Object* obj);
 
-#if defined(TARGET_ARM64) || defined(FEATURE_WRITEBARRIER_COPY)
 // ARM64 JIT_WriteBarrier uses speciall ABI and thus is not callable directly
 // Copied write barriers must be called at a different location
 extern "C" FCDECL2(VOID, JIT_WriteBarrier_Callable, Object **dst, Object *ref);
 #define WriteBarrier_Helper JIT_WriteBarrier_Callable
-#else
-// in other cases the regular JIT helper is callable.
-#define WriteBarrier_Helper JIT_WriteBarrier
-#endif
 
 extern "C" FCDECL1(void, JIT_InternalThrow, unsigned exceptNum);
 extern "C" FCDECL1(void*, JIT_InternalThrowFromHelper, unsigned exceptNum);
diff --git a/src/coreclr/vm/loaderallocator.cpp b/src/coreclr/vm/loaderallocator.cpp
index 4f222be4a2c03..0a77e4445f06f 100644
--- a/src/coreclr/vm/loaderallocator.cpp
+++ b/src/coreclr/vm/loaderallocator.cpp
@@ -1137,7 +1137,7 @@ void LoaderAllocator::Init(BaseDomain *pDomain, BYTE *pExecutableHeapMemory)
     _ASSERTE(dwTotalReserveMemSize <= VIRTUAL_ALLOC_RESERVE_GRANULARITY);
 #endif
 
-    BYTE * initReservedMem = ClrVirtualAllocExecutable(dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS);
+    BYTE * initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize);
 
     m_InitialReservedMemForLoaderHeaps = initReservedMem;
 
@@ -1672,18 +1672,25 @@ void AssemblyLoaderAllocator::SetCollectible()
 {
     CONTRACTL
     {
-        THROWS;
+        NOTHROW;
     }
     CONTRACTL_END;
 
     m_IsCollectible = true;
-#ifndef DACCESS_COMPILE
-    m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap);
-#endif
 }
 
 #ifndef DACCESS_COMPILE
 
+void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain)
+{
+    m_Id.Init();
+    LoaderAllocator::Init((BaseDomain *)pAppDomain);
+    if (IsCollectible())
+    {
+        m_pShuffleThunkCache = new ShuffleThunkCache(m_pStubHeap);
+    }
+}
+
 #ifndef CROSSGEN_COMPILE
 
 AssemblyLoaderAllocator::~AssemblyLoaderAllocator()
diff --git a/src/coreclr/vm/loaderallocator.inl b/src/coreclr/vm/loaderallocator.inl
index a826675ccc93c..993732d4010f8 100644
--- a/src/coreclr/vm/loaderallocator.inl
+++ b/src/coreclr/vm/loaderallocator.inl
@@ -21,12 +21,6 @@ inline void GlobalLoaderAllocator::Init(BaseDomain *pDomain)
     LoaderAllocator::Init(pDomain, m_ExecutableHeapInstance);
 }
 
-inline void AssemblyLoaderAllocator::Init(AppDomain* pAppDomain)
-{
-    m_Id.Init();
-    LoaderAllocator::Init((BaseDomain *)pAppDomain);
-}
-
 inline BOOL LoaderAllocatorID::Equals(LoaderAllocatorID *pId)
 {
     LIMITED_METHOD_CONTRACT;
diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp
index bd3984d8697cd..db308ab208a8e 100644
--- a/src/coreclr/vm/method.cpp
+++ b/src/coreclr/vm/method.cpp
@@ -4188,46 +4188,6 @@ c_CentralJumpCode = {
 };
 #include <poppack.h>
 
-#elif defined(TARGET_AMD64)
-
-#include <pshpack1.h>
-static const struct CentralJumpCode {
-    BYTE m_movzxRAX[4];
-    BYTE m_shlEAX[4];
-    BYTE m_movRAX[2];
-    MethodDesc* m_pBaseMD;
-    BYTE m_addR10RAX[3];
-    BYTE m_jmp[1];
-    INT32 m_rel32;
-
-    inline void Setup(CentralJumpCode* pCodeRX, MethodDesc* pMD, PCODE target, LoaderAllocator *pLoaderAllocator) {
-        WRAPPER_NO_CONTRACT;
-        m_pBaseMD = pMD;
-        m_rel32 = rel32UsingJumpStub(&pCodeRX->m_rel32, target, pMD, pLoaderAllocator);
-    }
-
-    inline BOOL CheckTarget(TADDR target) {
-        WRAPPER_NO_CONTRACT;
-        TADDR addr = rel32Decode(PTR_HOST_MEMBER_TADDR(CentralJumpCode, this, m_rel32));
-        if (*PTR_BYTE(addr) == 0x48 &&
-            *PTR_BYTE(addr+1) == 0xB8 &&
-            *PTR_BYTE(addr+10) == 0xFF &&
-            *PTR_BYTE(addr+11) == 0xE0)
-        {
-            addr = *PTR_TADDR(addr+2);
-        }
-        return (addr == target);
-    }
-}
-c_CentralJumpCode = {
-    { 0x48, 0x0F, 0xB6, 0xC0 },                         //   movzx rax,al
-    { 0x48, 0xC1, 0xE0, MethodDesc::ALIGNMENT_SHIFT },  //   shl   rax, MethodDesc::ALIGNMENT_SHIFT
-    { 0x49, 0xBA }, NULL,                               //   mov   r10, pBaseMD
-    { 0x4C, 0x03, 0xD0 },                               //   add   r10,rax
-    { 0xE9 }, 0                     // jmp PreStub
-};
-#include <poppack.h>
-
 #elif defined(TARGET_ARM)
 
 #include <pshpack1.h>
diff --git a/src/coreclr/vm/precode.cpp b/src/coreclr/vm/precode.cpp
index 80731c191e737..0bd2bd657f9ad 100644
--- a/src/coreclr/vm/precode.cpp
+++ b/src/coreclr/vm/precode.cpp
@@ -480,7 +480,9 @@ void Precode::Reset()
 #ifdef HAS_FIXUP_PRECODE_CHUNKS
     if (t == PRECODE_FIXUP)
     {
-        size = sizeof(FixupPrecode) + sizeof(PTR_MethodDesc);
+        // The writeable size the Init method accesses is dynamic depending on
+        // the FixupPrecode members.
+        size = ((FixupPrecode*)this)->GetSizeRW();
     }
     else
 #endif
diff --git a/src/coreclr/vm/stackwalk.cpp b/src/coreclr/vm/stackwalk.cpp
index 0971334af4d31..e61802b984950 100644
--- a/src/coreclr/vm/stackwalk.cpp
+++ b/src/coreclr/vm/stackwalk.cpp
@@ -713,14 +713,12 @@ UINT_PTR Thread::VirtualUnwindToFirstManagedCallFrame(T_CONTEXT* pContext)
     // get our caller's PSP, or our caller's caller's SP.
     while (!ExecutionManager::IsManagedCode(uControlPc))
     {
-#ifdef FEATURE_WRITEBARRIER_COPY
         if (IsIPInWriteBarrierCodeCopy(uControlPc))
         {
             // Pretend we were executing the barrier function at its original location so that the unwinder can unwind the frame
             uControlPc = AdjustWriteBarrierIP(uControlPc);
             SetIP(pContext, uControlPc);
         }
-#endif // FEATURE_WRITEBARRIER_COPY
 
 #ifndef TARGET_UNIX
         uControlPc = VirtualUnwindCallFrame(pContext);
diff --git a/src/coreclr/vm/stublink.cpp b/src/coreclr/vm/stublink.cpp
index 04a33e3982613..304cb4fb35b44 100644
--- a/src/coreclr/vm/stublink.cpp
+++ b/src/coreclr/vm/stublink.cpp
@@ -846,7 +846,7 @@ Stub *StubLinker::Link(LoaderHeap *pHeap, DWORD flags)
                 );
         ASSERT(pStub != NULL);
 
-        bool fSuccess = EmitStub(pStub, globalsize, pHeap);
+        bool fSuccess = EmitStub(pStub, globalsize, size, pHeap);
 
 #ifdef STUBLINKER_GENERATES_UNWIND_INFO
         if (fSuccess)
@@ -1007,13 +1007,13 @@ int StubLinker::CalculateSize(int* pGlobalSize)
     return globalsize + datasize;
 }
 
-bool StubLinker::EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap)
+bool StubLinker::EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap)
 {
     STANDARD_VM_CONTRACT;
 
     BYTE *pCode = (BYTE*)(pStub->GetBlob());
 
-    ExecutableWriterHolder<Stub> stubWriterHolder(pStub, sizeof(Stub));
+    ExecutableWriterHolder<Stub> stubWriterHolder(pStub, sizeof(Stub) + totalSize);
     Stub *pStubRW = stubWriterHolder.GetRW();
 
     BYTE *pCodeRW = (BYTE*)(pStubRW->GetBlob());
@@ -2013,11 +2013,7 @@ VOID Stub::DeleteStub()
         FillMemory(this+1, m_numCodeBytes, 0xcc);
 #endif
 
-#ifndef TARGET_UNIX
-        DeleteExecutable((BYTE*)GetAllocationBase());
-#else
         delete [] (BYTE*)GetAllocationBase();
-#endif
     }
 }
 
@@ -2124,11 +2120,7 @@ Stub* Stub::NewStub(PTR_VOID pCode, DWORD flags)
     BYTE *pBlock;
     if (pHeap == NULL)
     {
-#ifndef TARGET_UNIX
-        pBlock = new (executable) BYTE[totalSize];
-#else
         pBlock = new BYTE[totalSize];
-#endif
     }
     else
     {
diff --git a/src/coreclr/vm/stublink.h b/src/coreclr/vm/stublink.h
index 94326f9962ea7..9613fd48f687d 100644
--- a/src/coreclr/vm/stublink.h
+++ b/src/coreclr/vm/stublink.h
@@ -395,7 +395,7 @@ class StubLinker
 
         // Writes out the code element into memory following the
         // stub object.
-        bool EmitStub(Stub* pStub, int globalsize, LoaderHeap* pHeap);
+        bool EmitStub(Stub* pStub, int globalsize, int totalSize, LoaderHeap* pHeap);
 
         CodeRun *GetLastCodeRunIfAny();
 
diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp
index 4dfa4a22b3fa4..2302617614efd 100644
--- a/src/coreclr/vm/threads.cpp
+++ b/src/coreclr/vm/threads.cpp
@@ -1078,18 +1078,30 @@ DWORD_PTR Thread::OBJREF_HASH = OBJREF_TABSIZE;
 extern "C" void STDCALL JIT_PatchedCodeStart();
 extern "C" void STDCALL JIT_PatchedCodeLast();
 
-#ifdef FEATURE_WRITEBARRIER_COPY
-
 static void* s_barrierCopy = NULL;
 
 BYTE* GetWriteBarrierCodeLocation(VOID* barrier)
 {
-    return (BYTE*)s_barrierCopy + ((BYTE*)barrier - (BYTE*)JIT_PatchedCodeStart);
+    if (IsWriteBarrierCopyEnabled())
+    {
+        return (BYTE*)PINSTRToPCODE((TADDR)s_barrierCopy + ((TADDR)barrier - (TADDR)JIT_PatchedCodeStart));
+    }
+    else
+    {
+        return (BYTE*)barrier;
+    }
 }
 
 BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc)
 {
-    return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart)));
+    if (IsWriteBarrierCopyEnabled())
+    {
+        return (s_barrierCopy <= (void*)controlPc && (void*)controlPc < ((BYTE*)s_barrierCopy + ((BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart)));
+    }
+    else
+    {
+        return FALSE;
+    }
 }
 
 PCODE AdjustWriteBarrierIP(PCODE controlPc)
@@ -1100,14 +1112,21 @@ PCODE AdjustWriteBarrierIP(PCODE controlPc)
     return (PCODE)JIT_PatchedCodeStart + (controlPc - (PCODE)s_barrierCopy);
 }
 
+#ifdef TARGET_X86
+extern "C" void *JIT_WriteBarrierEAX_Loc;
+#else
 extern "C" void *JIT_WriteBarrier_Loc;
+#endif
+
 #ifdef TARGET_ARM64
 extern "C" void (*JIT_WriteBarrier_Table)();
 extern "C" void *JIT_WriteBarrier_Loc = 0;
 extern "C" void *JIT_WriteBarrier_Table_Loc = 0;
 #endif // TARGET_ARM64
 
-#endif // FEATURE_WRITEBARRIER_COPY
+#ifdef TARGET_ARM
+extern "C" void *JIT_WriteBarrier_Loc = 0;
+#endif // TARGET_ARM
 
 #ifndef TARGET_UNIX
 // g_TlsIndex is only used by the DAC. Disable optimizations around it to prevent it from getting optimized out.
@@ -1138,50 +1157,74 @@ void InitThreadManager()
     _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart > (ptrdiff_t)0);
     _ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart < (ptrdiff_t)GetOsPageSize());
 
-#ifdef FEATURE_WRITEBARRIER_COPY
-    s_barrierCopy = ClrVirtualAlloc(NULL, g_SystemInfo.dwAllocationGranularity, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
-    if (s_barrierCopy == NULL)
-    {
-        _ASSERTE(!"ClrVirtualAlloc of GC barrier code page failed");
-        COMPlusThrowWin32();
-    }
-
+    if (IsWriteBarrierCopyEnabled())
     {
-        size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart;
-        ExecutableWriterHolder<void> barrierWriterHolder(s_barrierCopy, writeBarrierSize);
-        memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize);
-    }
+        s_barrierCopy = ExecutableAllocator::Instance()->Reserve(g_SystemInfo.dwAllocationGranularity);
+        ExecutableAllocator::Instance()->Commit(s_barrierCopy, g_SystemInfo.dwAllocationGranularity, true);
+        if (s_barrierCopy == NULL)
+        {
+            _ASSERTE(!"Allocation of GC barrier code page failed");
+            COMPlusThrowWin32();
+        }
 
-    // Store the JIT_WriteBarrier copy location to a global variable so that helpers
-    // can jump to it.
-    JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier);
+        {
+            size_t writeBarrierSize = (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart;
+            ExecutableWriterHolder<void> barrierWriterHolder(s_barrierCopy, writeBarrierSize);
+            memcpy(barrierWriterHolder.GetRW(), (BYTE*)JIT_PatchedCodeStart, writeBarrierSize);
+        }
 
-    SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier));
+        // Store the JIT_WriteBarrier copy location to a global variable so that helpers
+        // can jump to it.
+#ifdef TARGET_X86
+        JIT_WriteBarrierEAX_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX);
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EAX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX));
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ECX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierECX));
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBX));
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ESI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierESI));
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EDI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEDI));
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBP, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBP));
+#else // TARGET_X86
+        JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier);
+#endif // TARGET_X86
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier));
 
 #ifdef TARGET_ARM64
-    // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated.
-    JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table);
-
-    SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier));
-    SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier));
+        // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated.
+        JIT_WriteBarrier_Table_Loc = GetWriteBarrierCodeLocation((void*)&JIT_WriteBarrier_Table);
 #endif // TARGET_ARM64
 
-#else // FEATURE_WRITEBARRIER_COPY
+#if defined(TARGET_ARM64) || defined(TARGET_ARM)
+        SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier));
+        SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier));
+#endif // TARGET_ARM64 || TARGET_ARM
 
-    // I am using virtual protect to cover the entire range that this code falls in.
-    //
+    }
+    else
+    {
+        // I am using virtual protect to cover the entire range that this code falls in.
+        //
 
-    // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth,
-    // so instead we'll leave it writable from here forward.
+        // We could reset it to non-writeable inbetween GCs and such, but then we'd have to keep on re-writing back and forth,
+        // so instead we'll leave it writable from here forward.
 
-    DWORD oldProt;
-    if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart,
-                           PAGE_EXECUTE_READWRITE, &oldProt))
-    {
-        _ASSERTE(!"ClrVirtualProtect of code page failed");
-        COMPlusThrowWin32();
+        DWORD oldProt;
+        if (!ClrVirtualProtect((void *)JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart,
+                            PAGE_EXECUTE_READWRITE, &oldProt))
+        {
+            _ASSERTE(!"ClrVirtualProtect of code page failed");
+            COMPlusThrowWin32();
+        }
+
+#ifdef TARGET_X86
+        JIT_WriteBarrierEAX_Loc = (void*)JIT_WriteBarrierEAX;
+#else
+        JIT_WriteBarrier_Loc = (void*)JIT_WriteBarrier;
+#endif
+#ifdef TARGET_ARM64
+        // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated.
+        JIT_WriteBarrier_Table_Loc = (void*)&JIT_WriteBarrier_Table;
+#endif // TARGET_ARM64
     }
-#endif // FEATURE_WRITEBARRIER_COPY
 
 #ifndef TARGET_UNIX
     _ASSERTE(GetThreadNULLOk() == NULL);
diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h
index 0aadbf40260ca..8a66c0555129d 100644
--- a/src/coreclr/vm/threads.h
+++ b/src/coreclr/vm/threads.h
@@ -6272,18 +6272,23 @@ class ThreadStateNCStackHolder
 
 BOOL Debug_IsLockedViaThreadSuspension();
 
-#ifdef FEATURE_WRITEBARRIER_COPY
+inline BOOL IsWriteBarrierCopyEnabled()
+{
+#ifdef DACCESS_COMPILE
+    return FALSE;
+#else // DACCESS_COMPILE
+#ifdef HOST_OSX
+    return TRUE;
+#else
+    return ExecutableAllocator::IsWXORXEnabled();
+#endif
+#endif // DACCESS_COMPILE
+}
 
 BYTE* GetWriteBarrierCodeLocation(VOID* barrier);
 BOOL IsIPInWriteBarrierCodeCopy(PCODE controlPc);
 PCODE AdjustWriteBarrierIP(PCODE controlPc);
 
-#else // FEATURE_WRITEBARRIER_COPY
-
-#define GetWriteBarrierCodeLocation(barrier) ((BYTE*)(barrier))
-
-#endif // FEATURE_WRITEBARRIER_COPY
-
 #if !defined(DACCESS_COMPILE) && !defined(CROSSGEN_COMPILE)
 extern thread_local Thread* t_pStackWalkerWalkingThread;
 #define SET_THREAD_TYPE_STACKWALKER(pThread)    t_pStackWalkerWalkingThread = pThread
diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp
index 95d568d641c73..6d4fdcffd62e0 100644
--- a/src/coreclr/vm/virtualcallstub.cpp
+++ b/src/coreclr/vm/virtualcallstub.cpp
@@ -641,7 +641,7 @@ void VirtualCallStubManager::Init(BaseDomain *pDomain, LoaderAllocator *pLoaderA
                               dwTotalReserveMemSize);
         }
 
-        initReservedMem = ClrVirtualAllocExecutable (dwTotalReserveMemSize, MEM_RESERVE, PAGE_NOACCESS);
+        initReservedMem = (BYTE*)ExecutableAllocator::Instance()->Reserve(dwTotalReserveMemSize);
 
         m_initialReservedMemForHeaps = (BYTE *) initReservedMem;
 

From f7e73635ada7727eb90c1ec65337b90428682805 Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Wed, 30 Jun 2021 13:34:24 +0200
Subject: [PATCH 2/8] Fix FreeBSD < ver 13

The memfd_create was introduced in 13.0 and it is equivalent to the
shm_open(SHM_ANON, ...)
---
 src/coreclr/minipal/Unix/doublemapping.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp
index 52f3809efb868..924196551e900 100644
--- a/src/coreclr/minipal/Unix/doublemapping.cpp
+++ b/src/coreclr/minipal/Unix/doublemapping.cpp
@@ -41,7 +41,11 @@ bool VMToOSInterface::CreateDoubleMemoryMapper(void** pHandle, size_t *pMaxExecu
 {
 #ifndef TARGET_OSX
 
+#ifdef TARGET_FREEBSD
+    int fd = shm_open(SHM_ANON, O_RDWR | O_CREAT, S_IRWXU);
+#else // TARGET_FREEBSD
     int fd = memfd_create("doublemapper", MFD_CLOEXEC);
+#endif // TARGET_FREEBSD
 
     if (fd == -1)
     {

From 74c15e27d55f62bb70e0b5078676d4295c5496ed Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Thu, 8 Jul 2021 23:39:42 +0200
Subject: [PATCH 3/8] Reflect PR feedback and fix old macOS x64

---
 src/coreclr/inc/CrstTypes.def                 |  2 +-
 src/coreclr/inc/clrconfigvalues.h             |  2 +-
 src/coreclr/inc/crsttypes.h                   | 40 +++++++++---------
 src/coreclr/minipal/Unix/doublemapping.cpp    | 41 ++++++++++++++++++-
 src/coreclr/minipal/Windows/doublemapping.cpp | 41 -------------------
 src/coreclr/utilcode/executableallocator.cpp  |  2 +-
 src/coreclr/vm/jitinterface.h                 | 37 ++++++++---------
 src/coreclr/vm/threads.cpp                    | 18 +++++---
 8 files changed, 92 insertions(+), 91 deletions(-)

diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def
index 3b67b14834e29..74b8c165ca934 100644
--- a/src/coreclr/inc/CrstTypes.def
+++ b/src/coreclr/inc/CrstTypes.def
@@ -202,7 +202,7 @@ Crst Exception
 End
 
 Crst ExecutableAllocatorLock
-    Unordered
+    AcquiredAfter LoaderHeap
 End
 
 Crst ExecuteManRangeLock
diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 1c57796cb2e39..bfd43629017a3 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -738,7 +738,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is
 //
 // Executable code
 //
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWXORX, W("EnableWXORX"), 1, "Enable W^X for executable memory.");
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 1, "Enable W^X for executable memory.");
 
 #ifdef FEATURE_GDBJIT
 ///
diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h
index 462a654a62c5f..015199f390fd0 100644
--- a/src/coreclr/inc/crsttypes.h
+++ b/src/coreclr/inc/crsttypes.h
@@ -151,8 +151,8 @@ int g_rgCrstLevelMap[] =
     0,          // CrstArgBasedStubCache
     0,          // CrstAssemblyList
     12,         // CrstAssemblyLoader
-    3,          // CrstAvailableClass
-    4,          // CrstAvailableParamTypes
+    4,          // CrstAvailableClass
+    5,          // CrstAvailableParamTypes
     7,          // CrstBaseDomain
     -1,         // CrstCCompRC
     13,         // CrstClassFactInfoHash
@@ -161,7 +161,7 @@ int g_rgCrstLevelMap[] =
     6,          // CrstCodeFragmentHeap
     9,          // CrstCodeVersioning
     0,          // CrstCOMCallWrapper
-    4,          // CrstCOMWrapperCache
+    5,          // CrstCOMWrapperCache
     3,          // CrstDataTest1
     0,          // CrstDataTest2
     0,          // CrstDbgTransport
@@ -180,10 +180,10 @@ int g_rgCrstLevelMap[] =
     18,         // CrstEventPipe
     0,          // CrstEventStore
     0,          // CrstException
-    -1,         // CrstExecutableAllocatorLock
+    0,          // CrstExecutableAllocatorLock
     0,          // CrstExecuteManRangeLock
     0,          // CrstExternalObjectContextCache
-    3,          // CrstFCall
+    4,          // CrstFCall
     7,          // CrstFuncPtrStubs
     10,         // CrstFusionAppCtx
     10,         // CrstGCCover
@@ -198,25 +198,25 @@ int g_rgCrstLevelMap[] =
     3,          // CrstInlineTrackingMap
     17,         // CrstInstMethodHashTable
     20,         // CrstInterop
-    4,          // CrstInteropData
+    5,          // CrstInteropData
     0,          // CrstIsJMCMethod
     7,          // CrstISymUnmanagedReader
     11,         // CrstJit
     0,          // CrstJitGenericHandleCache
     16,         // CrstJitInlineTrackingMap
-    3,          // CrstJitPatchpoint
+    4,          // CrstJitPatchpoint
     -1,         // CrstJitPerf
     6,          // CrstJumpStubCache
     0,          // CrstLeafLock
     -1,         // CrstListLock
     15,         // CrstLoaderAllocator
     16,         // CrstLoaderAllocatorReferences
-    0,          // CrstLoaderHeap
+    3,          // CrstLoaderHeap
     3,          // CrstManagedObjectWrapperMap
     14,         // CrstMethodDescBackpatchInfoTracker
-    4,          // CrstModule
+    5,          // CrstModule
     15,         // CrstModuleFixup
-    3,          // CrstModuleLookupTable
+    4,          // CrstModuleLookupTable
     0,          // CrstMulticoreJitHash
     13,         // CrstMulticoreJitManager
     0,          // CrstNativeImageEagerFixups
@@ -224,22 +224,22 @@ int g_rgCrstLevelMap[] =
     0,          // CrstNls
     0,          // CrstNotifyGdb
     2,          // CrstObjectList
-    4,          // CrstPEImage
+    5,          // CrstPEImage
     19,         // CrstPendingTypeLoadEntry
-    3,          // CrstPgoData
+    4,          // CrstPgoData
     0,          // CrstPinnedByrefValidation
     0,          // CrstProfilerGCRefDataFreeList
     0,          // CrstProfilingAPIStatus
-    3,          // CrstRCWCache
+    4,          // CrstRCWCache
     0,          // CrstRCWCleanupList
     10,         // CrstReadyToRunEntryPointToMethodDescMap
     8,          // CrstReflection
     17,         // CrstReJITGlobalRequest
-    3,          // CrstRetThunkCache
+    4,          // CrstRetThunkCache
     3,          // CrstSavedExceptionInfo
     0,          // CrstSaveModuleProfileData
     0,          // CrstSecurityStackwalkCache
-    3,          // CrstSigConvert
+    4,          // CrstSigConvert
     5,          // CrstSingleUseLock
     0,          // CrstSpecialStatics
     0,          // CrstStackSampler
@@ -249,7 +249,7 @@ int g_rgCrstLevelMap[] =
     4,          // CrstStubUnwindInfoHeapSegments
     3,          // CrstSyncBlockCache
     0,          // CrstSyncHashLock
-    4,          // CrstSystemBaseDomain
+    5,          // CrstSystemBaseDomain
     13,         // CrstSystemDomain
     0,          // CrstSystemDomainDelayedUnloadList
     0,          // CrstThreadIdDispenser
@@ -258,13 +258,13 @@ int g_rgCrstLevelMap[] =
     13,         // CrstThreadpoolWorker
     12,         // CrstThreadStore
     8,          // CrstTieredCompilation
-    3,          // CrstTypeEquivalenceMap
+    4,          // CrstTypeEquivalenceMap
     10,         // CrstTypeIDMap
-    3,          // CrstUMEntryThunkCache
-    3,          // CrstUniqueStack
+    4,          // CrstUMEntryThunkCache
+    4,          // CrstUniqueStack
     7,          // CrstUnresolvedClassLock
     3,          // CrstUnwindInfoTableLock
-    3,          // CrstVSDIndirectionCellLock
+    4,          // CrstVSDIndirectionCellLock
     3,          // CrstWrapperTemplate
 };
 
diff --git a/src/coreclr/minipal/Unix/doublemapping.cpp b/src/coreclr/minipal/Unix/doublemapping.cpp
index 924196551e900..a50b326861aad 100644
--- a/src/coreclr/minipal/Unix/doublemapping.cpp
+++ b/src/coreclr/minipal/Unix/doublemapping.cpp
@@ -13,6 +13,7 @@
 #include <string.h>
 #include <assert.h>
 #include <limits.h>
+#include <errno.h>
 #ifdef TARGET_LINUX
 #include <linux/memfd.h>
 #include <sys/syscall.h> // __NR_memfd_create
@@ -77,6 +78,39 @@ void VMToOSInterface::DestroyDoubleMemoryMapper(void *mapperHandle)
 
 extern "C" void* PAL_VirtualReserveFromExecutableMemoryAllocatorWithinRange(const void* lpBeginAddress, const void* lpEndAddress, size_t dwSize);
 
+#ifdef TARGET_OSX
+bool IsMapJitFlagNeeded()
+{
+    static volatile int isMapJitFlagNeeded = -1;
+
+    if (isMapJitFlagNeeded == -1)
+    {
+        int mapJitFlagCheckResult = 0;
+        int pageSize = sysconf(_SC_PAGE_SIZE);
+        // Try to map a page with read-write-execute protection. It should fail on Mojave hardened runtime and higher.
+        void* testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+        if (testPage == MAP_FAILED && (errno == EACCES))
+        {
+            // The mapping has failed with EACCES, check if making the same mapping with MAP_JIT flag works
+            testPage = mmap(NULL, pageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE | MAP_JIT, -1, 0);
+            if (testPage != MAP_FAILED)
+            {
+                mapJitFlagCheckResult = 1;
+            }
+        }
+
+        if (testPage != MAP_FAILED)
+        {
+            munmap(testPage, pageSize);
+        }
+
+        isMapJitFlagNeeded = mapJitFlagCheckResult;
+    }
+
+    return (bool)isMapJitFlagNeeded;
+}
+#endif // TARGET_OSX
+
 void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offset, size_t size, const void *rangeStart, const void* rangeEnd)
 {
     int fd = (int)(size_t)mapperHandle;
@@ -103,7 +137,12 @@ void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offs
 #ifndef TARGET_OSX
     void* result = mmap(NULL, size, PROT_NONE, MAP_SHARED, fd, offset);
 #else
-    void* result = mmap(NULL, size, PROT_NONE, MAP_JIT | MAP_ANON | MAP_PRIVATE, -1, 0);
+    int mmapFlags = MAP_ANON | MAP_PRIVATE;
+    if (IsMapJitFlagNeeded())
+    {
+        mmapFlags |= MAP_JIT;
+    }
+    void* result = mmap(NULL, size, PROT_NONE, mmapFlags, -1, 0);
 #endif    
     if (result == MAP_FAILED)
     {
diff --git a/src/coreclr/minipal/Windows/doublemapping.cpp b/src/coreclr/minipal/Windows/doublemapping.cpp
index 5edda681f2598..e265f1d139ad0 100644
--- a/src/coreclr/minipal/Windows/doublemapping.cpp
+++ b/src/coreclr/minipal/Windows/doublemapping.cpp
@@ -163,19 +163,6 @@ void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offs
                 break;
             }
 
-#ifdef _DEBUG
-            // if (ShouldInjectFaultInRange())
-            // {
-            //     // return nullptr (failure)
-            //     faultInjected = true;
-            //     break;
-            // }
-#endif // _DEBUG
-
-            // On UNIX we can also fail if our request size 'dwSize' is larger than 64K and
-            // and our tryAddr is pointing at a small MEM_FREE region (smaller than 'dwSize')
-            // However we can't distinguish between this and the race case.
-
             // We might fail in a race.  So just move on to next region and continue trying
             tryAddr = tryAddr + VIRTUAL_ALLOC_RESERVE_GRANULARITY;
         }
@@ -187,35 +174,7 @@ void* VMToOSInterface::ReserveDoubleMappedMemory(void *mapperHandle, size_t offs
         }
     }
 
-    // STRESS_LOG7(LF_JIT, LL_INFO100,
-    //             "ClrVirtualAllocWithinRange request #%u for %08x bytes in [ %p .. %p ], query count was %u - returned %s: %p\n",
-    //             countOfCalls, (DWORD)dwSize, pMinAddr, pMaxAddr,
-    //             virtualQueryCount, (pResult != nullptr) ? "success" : "failure", pResult);
-
-    // If we failed this call the process will typically be terminated
-    // so we log any additional reason for failing this call.
-    //
-    if (pResult == nullptr)
-    {
-        // if ((tryAddr + dwSize) > (BYTE *)pMaxAddr)
-        // {
-        //     // Our tryAddr reached pMaxAddr
-        //     STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: Address space exhausted.\n");
-        // }
-
-        // if (virtualQueryFailed)
-        // {
-        //     STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: VirtualQuery operation failed.\n");
-        // }
-
-        // if (faultInjected)
-        // {
-        //     STRESS_LOG0(LF_JIT, LL_INFO100, "Additional reason: fault injected.\n");
-        // }
-    }
-
     return pResult;
-
 }
 
 void *VMToOSInterface::CommitDoubleMappedMemory(void* pStart, size_t size, bool isExecutable)
diff --git a/src/coreclr/utilcode/executableallocator.cpp b/src/coreclr/utilcode/executableallocator.cpp
index 4d461e66e7e51..ac4c326c83784 100644
--- a/src/coreclr/utilcode/executableallocator.cpp
+++ b/src/coreclr/utilcode/executableallocator.cpp
@@ -154,7 +154,7 @@ HRESULT ExecutableAllocator::StaticInitialize(FatalErrorHandler fatalErrorHandle
     LIMITED_METHOD_CONTRACT;
 
     g_fatalErrorHandler = fatalErrorHandler;
-    g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWXORX) != 0;
+    g_isWXorXEnabled = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableWriteXorExecute) != 0;
     g_instance = new (nothrow) ExecutableAllocator();
     if (g_instance == NULL)
     {
diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h
index cf9617a353282..e071d0717d179 100644
--- a/src/coreclr/vm/jitinterface.h
+++ b/src/coreclr/vm/jitinterface.h
@@ -339,28 +339,25 @@ EXTERN_C FCDECL2_VV(UINT64, JIT_LRsz, UINT64 num, int shift);
 
 #ifdef TARGET_X86
 
+#define ENUM_X86_WRITE_BARRIER_REGISTERS() \
+    X86_WRITE_BARRIER_REGISTER(EAX) \
+    X86_WRITE_BARRIER_REGISTER(ECX) \
+    X86_WRITE_BARRIER_REGISTER(EBX) \
+    X86_WRITE_BARRIER_REGISTER(ESI) \
+    X86_WRITE_BARRIER_REGISTER(EDI) \
+    X86_WRITE_BARRIER_REGISTER(EBP)
+
 extern "C"
 {
-    void STDCALL JIT_CheckedWriteBarrierEAX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierEBX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierECX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierESI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierEDI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_CheckedWriteBarrierEBP(); // JIThelp.asm/JIThelp.s
-
-    void STDCALL JIT_DebugWriteBarrierEAX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierEBX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierECX(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierESI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierEDI(); // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_DebugWriteBarrierEBP(); // JIThelp.asm/JIThelp.s
-
-    void STDCALL JIT_WriteBarrierEAX();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierEBX();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierECX();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierESI();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierEDI();        // JIThelp.asm/JIThelp.s
-    void STDCALL JIT_WriteBarrierEBP();        // JIThelp.asm/JIThelp.s
+
+// JIThelp.asm/JIThelp.s
+#define X86_WRITE_BARRIER_REGISTER(reg) \
+    void STDCALL JIT_CheckedWriteBarrier##reg(); \
+    void STDCALL JIT_DebugWriteBarrier##reg(); \
+    void STDCALL JIT_WriteBarrier##reg();
+
+    ENUM_X86_WRITE_BARRIER_REGISTERS()
+#undef X86_WRITE_BARRIER_REGISTER
 
     void STDCALL JIT_WriteBarrierGroup();
     void STDCALL JIT_WriteBarrierGroup_End();
diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp
index 2302617614efd..c4a16d0b04484 100644
--- a/src/coreclr/vm/threads.cpp
+++ b/src/coreclr/vm/threads.cpp
@@ -1177,16 +1177,20 @@ void InitThreadManager()
         // can jump to it.
 #ifdef TARGET_X86
         JIT_WriteBarrierEAX_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX);
-        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EAX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEAX));
-        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ECX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierECX));
-        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBX, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBX));
-        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_ESI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierESI));
-        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EDI, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEDI));
-        SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_EBP, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrierEBP));
+
+#define X86_WRITE_BARRIER_REGISTER(reg) \
+    SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF_##reg, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg)); \
+    ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier##reg), W("@WriteBarrier" #reg));
+
+        ENUM_X86_WRITE_BARRIER_REGISTERS()
+
+#undef X86_WRITE_BARRIER_REGISTER
+
 #else // TARGET_X86
         JIT_WriteBarrier_Loc = GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier);
 #endif // TARGET_X86
         SetJitHelperFunction(CORINFO_HELP_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier));
+        ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_WriteBarrier), W("@WriteBarrier"));
 
 #ifdef TARGET_ARM64
         // Store the JIT_WriteBarrier_Table copy location to a global variable so that it can be updated.
@@ -1195,7 +1199,9 @@ void InitThreadManager()
 
 #if defined(TARGET_ARM64) || defined(TARGET_ARM)
         SetJitHelperFunction(CORINFO_HELP_CHECKED_ASSIGN_REF, GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier));
+        ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_CheckedWriteBarrier), W("@CheckedWriteBarrier"));
         SetJitHelperFunction(CORINFO_HELP_ASSIGN_BYREF, GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier));
+        ETW::MethodLog::StubInitialized((ULONGLONG)GetWriteBarrierCodeLocation((void*)JIT_ByRefWriteBarrier), W("@ByRefWriteBarrier"));
 #endif // TARGET_ARM64 || TARGET_ARM
 
     }

From 52a83d47ebfcf0d34a5b61931dcaf0882e5127ba Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Fri, 9 Jul 2021 09:40:25 +0200
Subject: [PATCH 4/8] Fix ExecutableAllocatorLock vs LeafLock ordering

---
 src/coreclr/inc/CrstTypes.def | 2 +-
 src/coreclr/inc/crsttypes.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def
index 74b8c165ca934..04e3de46146bb 100644
--- a/src/coreclr/inc/CrstTypes.def
+++ b/src/coreclr/inc/CrstTypes.def
@@ -202,7 +202,7 @@ Crst Exception
 End
 
 Crst ExecutableAllocatorLock
-    AcquiredAfter LoaderHeap
+    AcquiredAfter LoaderHeap LeafLock
 End
 
 Crst ExecuteManRangeLock
diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h
index 015199f390fd0..b7809731e58c5 100644
--- a/src/coreclr/inc/crsttypes.h
+++ b/src/coreclr/inc/crsttypes.h
@@ -207,7 +207,7 @@ int g_rgCrstLevelMap[] =
     4,          // CrstJitPatchpoint
     -1,         // CrstJitPerf
     6,          // CrstJumpStubCache
-    0,          // CrstLeafLock
+    3,          // CrstLeafLock
     -1,         // CrstListLock
     15,         // CrstLoaderAllocator
     16,         // CrstLoaderAllocatorReferences

From 605d67e9b40f219e3ccf59f5200e9f578bb8eb83 Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Fri, 9 Jul 2021 16:12:37 +0200
Subject: [PATCH 5/8] Replace LeafLock in UMEntryThunkFreeList by a new lock

Also update the ordering list of the ExecutableAllocatorLock
---
 src/coreclr/inc/CrstTypes.def        |  5 ++++-
 src/coreclr/inc/crsttypes.h          | 19 +++++++++++--------
 src/coreclr/vm/dllimportcallback.cpp |  2 +-
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def
index 04e3de46146bb..c7266df7dbb01 100644
--- a/src/coreclr/inc/CrstTypes.def
+++ b/src/coreclr/inc/CrstTypes.def
@@ -202,7 +202,7 @@ Crst Exception
 End
 
 Crst ExecutableAllocatorLock
-    AcquiredAfter LoaderHeap LeafLock
+    AcquiredAfter LoaderHeap ArgBasedStubCache UMEntryThunkFreeListLock
 End
 
 Crst ExecuteManRangeLock
@@ -509,6 +509,9 @@ Crst TypeEquivalenceMap
     AcquiredBefore LoaderHeap
 End
 
+Crst UMEntryThunkFreeListLock
+End
+
 Crst UniqueStack
     AcquiredBefore LoaderHeap
 End
diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h
index b7809731e58c5..7be482c48bb55 100644
--- a/src/coreclr/inc/crsttypes.h
+++ b/src/coreclr/inc/crsttypes.h
@@ -130,12 +130,13 @@ enum CrstType
     CrstTypeEquivalenceMap = 112,
     CrstTypeIDMap = 113,
     CrstUMEntryThunkCache = 114,
-    CrstUniqueStack = 115,
-    CrstUnresolvedClassLock = 116,
-    CrstUnwindInfoTableLock = 117,
-    CrstVSDIndirectionCellLock = 118,
-    CrstWrapperTemplate = 119,
-    kNumberOfCrstTypes = 120
+    CrstUMEntryThunkFreeListLock = 115,
+    CrstUniqueStack = 116,
+    CrstUnresolvedClassLock = 117,
+    CrstUnwindInfoTableLock = 118,
+    CrstVSDIndirectionCellLock = 119,
+    CrstWrapperTemplate = 120,
+    kNumberOfCrstTypes = 121
 };
 
 #endif // __CRST_TYPES_INCLUDED
@@ -148,7 +149,7 @@ int g_rgCrstLevelMap[] =
 {
     10,         // CrstAppDomainCache
     14,         // CrstAppDomainHandleTable
-    0,          // CrstArgBasedStubCache
+    3,          // CrstArgBasedStubCache
     0,          // CrstAssemblyList
     12,         // CrstAssemblyLoader
     4,          // CrstAvailableClass
@@ -207,7 +208,7 @@ int g_rgCrstLevelMap[] =
     4,          // CrstJitPatchpoint
     -1,         // CrstJitPerf
     6,          // CrstJumpStubCache
-    3,          // CrstLeafLock
+    0,          // CrstLeafLock
     -1,         // CrstListLock
     15,         // CrstLoaderAllocator
     16,         // CrstLoaderAllocatorReferences
@@ -261,6 +262,7 @@ int g_rgCrstLevelMap[] =
     4,          // CrstTypeEquivalenceMap
     10,         // CrstTypeIDMap
     4,          // CrstUMEntryThunkCache
+    3,          // CrstUMEntryThunkFreeListLock
     4,          // CrstUniqueStack
     7,          // CrstUnresolvedClassLock
     3,          // CrstUnwindInfoTableLock
@@ -386,6 +388,7 @@ LPCSTR g_rgCrstNameMap[] =
     "CrstTypeEquivalenceMap",
     "CrstTypeIDMap",
     "CrstUMEntryThunkCache",
+    "CrstUMEntryThunkFreeListLock",
     "CrstUniqueStack",
     "CrstUnresolvedClassLock",
     "CrstUnwindInfoTableLock",
diff --git a/src/coreclr/vm/dllimportcallback.cpp b/src/coreclr/vm/dllimportcallback.cpp
index 4a88f81df5210..4f3cf879d10a4 100644
--- a/src/coreclr/vm/dllimportcallback.cpp
+++ b/src/coreclr/vm/dllimportcallback.cpp
@@ -41,7 +41,7 @@ class UMEntryThunkFreeList
     {
         WRAPPER_NO_CONTRACT;
 
-        m_crst.Init(CrstLeafLock, CRST_UNSAFE_ANYMODE);
+        m_crst.Init(CrstUMEntryThunkFreeListLock, CRST_UNSAFE_ANYMODE);
     }
 
     UMEntryThunk *GetUMEntryThunk()

From 27252037fdd59b4b5444c2ac58c6829801b34922 Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Sat, 10 Jul 2021 03:19:08 +0200
Subject: [PATCH 6/8] Fix host test, little cleanup

---
 src/coreclr/utilcode/loaderheap.cpp | 89 +++++++++++++++++++----------
 src/coreclr/vm/virtualcallstub.cpp  | 12 ++--
 2 files changed, 64 insertions(+), 37 deletions(-)

diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp
index 5828763f512f2..90fc0c125c1f7 100644
--- a/src/coreclr/utilcode/loaderheap.cpp
+++ b/src/coreclr/utilcode/loaderheap.cpp
@@ -699,7 +699,7 @@ struct LoaderHeapFreeBlock
         size_t                 m_dwSize;   // Total size of this block (including this header)
 //! Try not to grow the size of this structure. It places a minimum size on LoaderHeap allocations.
 
-        static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap)
+        static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMemRX, void *pMemRW, size_t dwTotalSize, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
             STATIC_CONTRACT_GC_NOTRIGGER;
@@ -722,18 +722,19 @@ struct LoaderHeapFreeBlock
             }
 #endif
 
-            INDEBUG(memset(pMem, 0xcc, dwTotalSize);)
-            LoaderHeapFreeBlock *pNewBlock = (LoaderHeapFreeBlock*)pMem;
-            pNewBlock->m_pNext  = *ppHead;
-            pNewBlock->m_dwSize = dwTotalSize;
-            *ppHead = pNewBlock;
+            INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);)
+            LoaderHeapFreeBlock *pNewBlockRX = (LoaderHeapFreeBlock*)pMemRX;
+            LoaderHeapFreeBlock *pNewBlockRW = (LoaderHeapFreeBlock*)pMemRW;
+            pNewBlockRW->m_pNext  = *ppHead;
+            pNewBlockRW->m_dwSize = dwTotalSize;
+            *ppHead = pNewBlockRX;
 
-            MergeBlock(pNewBlock, pHeap);
+            MergeBlock(pNewBlockRX, pNewBlockRW, pHeap);
 
             LOADER_HEAP_END_TRAP_FAULT
         }
 
-
+#ifndef DACCESS_COMPILE
         static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, BOOL fRemoveFromFreeList, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
@@ -755,7 +756,14 @@ struct LoaderHeapFreeBlock
                     // Exact match. Hooray!
                     if (fRemoveFromFreeList)
                     {
-                        *ppWalk = pCur->m_pNext;
+                        ExecutableWriterHolder<LoaderHeapFreeBlock *> walkWriterHolder;
+                        LoaderHeapFreeBlock **ppWalkRW = ppWalk;
+                        if (pHeap->IsExecutable() && (ppWalk != ppHead))
+                        {
+                            walkWriterHolder = ExecutableWriterHolder<LoaderHeapFreeBlock *>(ppWalk, sizeof(LoaderHeapFreeBlock **));
+                            ppWalkRW = walkWriterHolder.GetRW();
+                        }
+                        *ppWalkRW = pCur->m_pNext;
                     }
                     break;
                 }
@@ -765,8 +773,24 @@ struct LoaderHeapFreeBlock
                     pResult = pCur;
                     if (fRemoveFromFreeList)
                     {
-                        *ppWalk = pCur->m_pNext;
-                        InsertFreeBlock(ppWalk, ((BYTE*)pCur) + dwSize, dwCurSize - dwSize, pHeap );
+                        ExecutableWriterHolder<LoaderHeapFreeBlock *> walkWriterHolder;
+                        LoaderHeapFreeBlock **ppWalkRW = ppWalk;
+                        if (pHeap->IsExecutable() && (ppWalk != ppHead))
+                        {
+                            walkWriterHolder = ExecutableWriterHolder<LoaderHeapFreeBlock *>(ppWalk, sizeof(LoaderHeapFreeBlock **));
+                            ppWalkRW = walkWriterHolder.GetRW();
+                        }
+                        *ppWalkRW = pCur->m_pNext;
+
+                        void* pMem = (BYTE*)pCur + dwSize;
+                        void* pMemRW = pMem;
+                        ExecutableWriterHolder<void> memWriterHolder;
+                        if (pHeap->IsExecutable())
+                        {
+                            memWriterHolder = ExecutableWriterHolder<void>(pMem, dwSize);
+                            pMemRW = memWriterHolder.GetRW();
+                        }
+                        InsertFreeBlock(ppWalkRW, pMem, pMemRW, dwCurSize - dwSize, pHeap );
                     }
                     break;
                 }
@@ -779,8 +803,15 @@ struct LoaderHeapFreeBlock
 
             if (pResult && fRemoveFromFreeList)
             {
+                void *pResultRW = pResult;
+                ExecutableWriterHolder<void> resultWriterHolder;
+                if (pHeap->IsExecutable())
+                {
+                    resultWriterHolder = ExecutableWriterHolder<void>(pResult, dwSize);
+                    pResultRW = resultWriterHolder.GetRW();
+                }
                 // Callers of loaderheap assume allocated memory is zero-inited so we must preserve this invariant!
-                memset(pResult, 0, dwSize);
+                memset(pResultRW, 0, dwSize);
             }
             LOADER_HEAP_END_TRAP_FAULT
             return pResult;
@@ -788,11 +819,11 @@ struct LoaderHeapFreeBlock
 
 
         }
-
+#endif // DACCESS_COMPILE
 
     private:
         // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened.
-        static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap)
+        static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlockRX, LoaderHeapFreeBlock *pFreeBlockRW, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
 
@@ -800,10 +831,10 @@ struct LoaderHeapFreeBlock
 
             LOADER_HEAP_BEGIN_TRAP_FAULT
 
-            LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext;
-            size_t               dwSize     = pFreeBlock->m_dwSize;
+            LoaderHeapFreeBlock *pNextBlock = pFreeBlockRX->m_pNext;
+            size_t               dwSize     = pFreeBlockRX->m_dwSize;
 
-            if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlock) + dwSize))
+            if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlockRX) + dwSize))
             {
                 result = FALSE;
             }
@@ -811,9 +842,9 @@ struct LoaderHeapFreeBlock
             {
                 size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize;
                 LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext;
-                INDEBUG(memset(pFreeBlock, 0xcc, dwCombinedSize);)
-                pFreeBlock->m_pNext  = pNextNextBlock;
-                pFreeBlock->m_dwSize = dwCombinedSize;
+                INDEBUG(memset(pFreeBlockRW, 0xcc, dwCombinedSize);)
+                pFreeBlockRW->m_pNext  = pNextNextBlock;
+                pFreeBlockRW->m_dwSize = dwCombinedSize;
 
                 result = TRUE;
             }
@@ -1514,25 +1545,25 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem,
     }
 #endif
 
+    void *pMemRW = pMem;
+    ExecutableWriterHolder<void> memWriterHolder;
+    if (m_Options & LHF_EXECUTABLE)
+    {
+        memWriterHolder = ExecutableWriterHolder<void>(pMem, dwSize);
+        pMemRW = memWriterHolder.GetRW();
+    }
+
     if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize ))
     {
         // Cool. This was the last block allocated. We can just undo the allocation instead
         // of going to the freelist.
-        void *pMemRW = pMem;
-        ExecutableWriterHolder<void> memWriterHolder;
-        if (m_Options & LHF_EXECUTABLE)
-        {
-            memWriterHolder = ExecutableWriterHolder<void>(pMem, dwSize);
-            pMemRW = memWriterHolder.GetRW();
-        }
         memset(pMemRW, 0x00, dwSize); // Fill freed region with 0
         m_pAllocPtr = (BYTE*)pMem;
     }
     else
     {
-        LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, dwSize, this);
+        LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, pMemRW, dwSize, this);
     }
-
 }
 
 
diff --git a/src/coreclr/vm/virtualcallstub.cpp b/src/coreclr/vm/virtualcallstub.cpp
index 6d4fdcffd62e0..3af4c52afc9bb 100644
--- a/src/coreclr/vm/virtualcallstub.cpp
+++ b/src/coreclr/vm/virtualcallstub.cpp
@@ -2766,11 +2766,7 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStub(PCODE            ad
     }
 #endif
 
-    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, sizeof(DispatchHolder)
-#ifdef TARGET_AMD64
-                                                                        + sizeof(DispatchStubShort)
-#endif
-                                                               );
+    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, dispatchHolderSize);
     dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode,
                        addrOfFail,
                        (size_t)pMTExpected
@@ -2833,9 +2829,9 @@ DispatchHolder *VirtualCallStubManager::GenerateDispatchStubLong(PCODE
     } CONTRACT_END;
 
     //allocate from the requisite heap and copy the template over it.
-    DispatchHolder * holder = (DispatchHolder*) (void*)
-        dispatch_heap->AllocAlignedMem(DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG), CODE_SIZE_ALIGN);
-    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, sizeof(DispatchHolder) + sizeof(DispatchStubLong));
+    size_t dispatchHolderSize = DispatchHolder::GetHolderSize(DispatchStub::e_TYPE_LONG);
+    DispatchHolder * holder = (DispatchHolder*) (void*)dispatch_heap->AllocAlignedMem(dispatchHolderSize, CODE_SIZE_ALIGN);
+    ExecutableWriterHolder<DispatchHolder> dispatchWriterHolder(holder, dispatchHolderSize);
 
     dispatchWriterHolder.GetRW()->Initialize(holder, addrOfCode,
                        addrOfFail,

From 357c2831663c73b978eef2bd03e0005df2ddb70e Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Sun, 11 Jul 2021 01:56:23 +0200
Subject: [PATCH 7/8] Fix ARM write barrier icache flushing

Also allocate LoaderHeapFreeBlock from regular heap.
---
 src/coreclr/utilcode/loaderheap.cpp | 140 +++++++++++++---------------
 src/coreclr/vm/arm/stubs.cpp        |   4 +
 2 files changed, 68 insertions(+), 76 deletions(-)

diff --git a/src/coreclr/utilcode/loaderheap.cpp b/src/coreclr/utilcode/loaderheap.cpp
index 90fc0c125c1f7..b3b381b2f9bef 100644
--- a/src/coreclr/utilcode/loaderheap.cpp
+++ b/src/coreclr/utilcode/loaderheap.cpp
@@ -695,15 +695,21 @@ size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHeap);
 struct LoaderHeapFreeBlock
 {
     public:
-        LoaderHeapFreeBlock   *m_pNext;    // Pointer to next block on free list
-        size_t                 m_dwSize;   // Total size of this block (including this header)
-//! Try not to grow the size of this structure. It places a minimum size on LoaderHeap allocations.
+        LoaderHeapFreeBlock   *m_pNext;         // Pointer to next block on free list
+        size_t                 m_dwSize;        // Total size of this block
+        void                  *m_pBlockAddress; // Virtual address of the block
 
-        static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMemRX, void *pMemRW, size_t dwTotalSize, UnlockedLoaderHeap *pHeap)
+#ifndef DACCESS_COMPILE
+        static void InsertFreeBlock(LoaderHeapFreeBlock **ppHead, void *pMem, size_t dwTotalSize, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
             STATIC_CONTRACT_GC_NOTRIGGER;
 
+            // The new "nothrow" below failure is handled in a non-fault way, so
+            // make sure that callers with FORBID_FAULT can call this method without
+            // firing the contract violation assert.
+            PERMANENT_CONTRACT_VIOLATION(FaultViolation, ReasonContractInfrastructure);
+
             LOADER_HEAP_BEGIN_TRAP_FAULT
 
             // It's illegal to insert a free block that's smaller than the minimum sized allocation -
@@ -722,20 +728,30 @@ struct LoaderHeapFreeBlock
             }
 #endif
 
-            INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);)
-            LoaderHeapFreeBlock *pNewBlockRX = (LoaderHeapFreeBlock*)pMemRX;
-            LoaderHeapFreeBlock *pNewBlockRW = (LoaderHeapFreeBlock*)pMemRW;
-            pNewBlockRW->m_pNext  = *ppHead;
-            pNewBlockRW->m_dwSize = dwTotalSize;
-            *ppHead = pNewBlockRX;
+            void* pMemRW = pMem;
+            ExecutableWriterHolder<void> memWriterHolder;
+            if (pHeap->IsExecutable())
+            {
+                memWriterHolder = ExecutableWriterHolder<void>(pMem, dwTotalSize);
+                pMemRW = memWriterHolder.GetRW();
+            }
 
-            MergeBlock(pNewBlockRX, pNewBlockRW, pHeap);
+            INDEBUG(memset(pMemRW, 0xcc, dwTotalSize);)
+            LoaderHeapFreeBlock *pNewBlock = new (nothrow) LoaderHeapFreeBlock;
+            // If we fail allocating the LoaderHeapFreeBlock, ignore the failure and don't insert the free block at all.
+            if (pNewBlock != NULL)
+            {
+                pNewBlock->m_pNext  = *ppHead;
+                pNewBlock->m_dwSize = dwTotalSize;
+                pNewBlock->m_pBlockAddress = pMem;
+                *ppHead = pNewBlock;
+                MergeBlock(pNewBlock, pHeap);
+            }
 
             LOADER_HEAP_END_TRAP_FAULT
         }
 
-#ifndef DACCESS_COMPILE
-        static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, BOOL fRemoveFromFreeList, UnlockedLoaderHeap *pHeap)
+        static void *AllocFromFreeList(LoaderHeapFreeBlock **ppHead, size_t dwSize, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
             STATIC_CONTRACT_GC_NOTRIGGER;
@@ -752,46 +768,19 @@ struct LoaderHeapFreeBlock
                 size_t dwCurSize = pCur->m_dwSize;
                 if (dwCurSize == dwSize)
                 {
-                    pResult = pCur;
+                    pResult = pCur->m_pBlockAddress;
                     // Exact match. Hooray!
-                    if (fRemoveFromFreeList)
-                    {
-                        ExecutableWriterHolder<LoaderHeapFreeBlock *> walkWriterHolder;
-                        LoaderHeapFreeBlock **ppWalkRW = ppWalk;
-                        if (pHeap->IsExecutable() && (ppWalk != ppHead))
-                        {
-                            walkWriterHolder = ExecutableWriterHolder<LoaderHeapFreeBlock *>(ppWalk, sizeof(LoaderHeapFreeBlock **));
-                            ppWalkRW = walkWriterHolder.GetRW();
-                        }
-                        *ppWalkRW = pCur->m_pNext;
-                    }
+                    *ppWalk = pCur->m_pNext;
+                    delete pCur;
                     break;
                 }
                 else if (dwCurSize > dwSize && (dwCurSize - dwSize) >= AllocMem_TotalSize(1, pHeap))
                 {
                     // Partial match. Ok...
-                    pResult = pCur;
-                    if (fRemoveFromFreeList)
-                    {
-                        ExecutableWriterHolder<LoaderHeapFreeBlock *> walkWriterHolder;
-                        LoaderHeapFreeBlock **ppWalkRW = ppWalk;
-                        if (pHeap->IsExecutable() && (ppWalk != ppHead))
-                        {
-                            walkWriterHolder = ExecutableWriterHolder<LoaderHeapFreeBlock *>(ppWalk, sizeof(LoaderHeapFreeBlock **));
-                            ppWalkRW = walkWriterHolder.GetRW();
-                        }
-                        *ppWalkRW = pCur->m_pNext;
-
-                        void* pMem = (BYTE*)pCur + dwSize;
-                        void* pMemRW = pMem;
-                        ExecutableWriterHolder<void> memWriterHolder;
-                        if (pHeap->IsExecutable())
-                        {
-                            memWriterHolder = ExecutableWriterHolder<void>(pMem, dwSize);
-                            pMemRW = memWriterHolder.GetRW();
-                        }
-                        InsertFreeBlock(ppWalkRW, pMem, pMemRW, dwCurSize - dwSize, pHeap );
-                    }
+                    pResult = pCur->m_pBlockAddress;
+                    *ppWalk = pCur->m_pNext;
+                    InsertFreeBlock(ppWalk, ((BYTE*)pCur->m_pBlockAddress) + dwSize, dwCurSize - dwSize, pHeap );
+                    delete pCur;
                     break;
                 }
 
@@ -801,7 +790,7 @@ struct LoaderHeapFreeBlock
                 ppWalk = &( pCur->m_pNext );
             }
 
-            if (pResult && fRemoveFromFreeList)
+            if (pResult)
             {
                 void *pResultRW = pResult;
                 ExecutableWriterHolder<void> resultWriterHolder;
@@ -815,15 +804,11 @@ struct LoaderHeapFreeBlock
             }
             LOADER_HEAP_END_TRAP_FAULT
             return pResult;
-
-
-
         }
-#endif // DACCESS_COMPILE
 
     private:
         // Try to merge pFreeBlock with its immediate successor. Return TRUE if a merge happened. FALSE if no merge happened.
-        static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlockRX, LoaderHeapFreeBlock *pFreeBlockRW, UnlockedLoaderHeap *pHeap)
+        static BOOL MergeBlock(LoaderHeapFreeBlock *pFreeBlock, UnlockedLoaderHeap *pHeap)
         {
             STATIC_CONTRACT_NOTHROW;
 
@@ -831,10 +816,10 @@ struct LoaderHeapFreeBlock
 
             LOADER_HEAP_BEGIN_TRAP_FAULT
 
-            LoaderHeapFreeBlock *pNextBlock = pFreeBlockRX->m_pNext;
-            size_t               dwSize     = pFreeBlockRX->m_dwSize;
+            LoaderHeapFreeBlock *pNextBlock = pFreeBlock->m_pNext;
+            size_t               dwSize     = pFreeBlock->m_dwSize;
 
-            if (pNextBlock == NULL || ((BYTE*)pNextBlock) != (((BYTE*)pFreeBlockRX) + dwSize))
+            if (pNextBlock == NULL || ((BYTE*)pNextBlock->m_pBlockAddress) != (((BYTE*)pFreeBlock->m_pBlockAddress) + dwSize))
             {
                 result = FALSE;
             }
@@ -842,9 +827,17 @@ struct LoaderHeapFreeBlock
             {
                 size_t dwCombinedSize = dwSize + pNextBlock->m_dwSize;
                 LoaderHeapFreeBlock *pNextNextBlock = pNextBlock->m_pNext;
-                INDEBUG(memset(pFreeBlockRW, 0xcc, dwCombinedSize);)
-                pFreeBlockRW->m_pNext  = pNextNextBlock;
-                pFreeBlockRW->m_dwSize = dwCombinedSize;
+                void *pMemRW = pFreeBlock->m_pBlockAddress;
+                ExecutableWriterHolder<void> memWriterHolder;
+                if (pHeap->IsExecutable())
+                {
+                    memWriterHolder = ExecutableWriterHolder<void>(pFreeBlock->m_pBlockAddress, dwCombinedSize);
+                    pMemRW = memWriterHolder.GetRW();
+                }
+                INDEBUG(memset(pMemRW, 0xcc, dwCombinedSize);)
+                pFreeBlock->m_pNext  = pNextNextBlock;
+                pFreeBlock->m_dwSize = dwCombinedSize;
+                delete pNextBlock;
 
                 result = TRUE;
             }
@@ -853,7 +846,7 @@ struct LoaderHeapFreeBlock
             return result;
 
         }
-
+#endif // DACCESS_COMPILE
 };
 
 
@@ -871,8 +864,7 @@ struct LoaderHeapFreeBlock
 //   - z  bytes of pad  (DEBUG-ONLY) (where "z" is just enough to pointer-align the following byte)
 //   - a  bytes of tag  (DEBUG-ONLY) (where "a" is sizeof(LoaderHeapValidationTag)
 //
-//   - b  bytes of pad               (if total size after all this < sizeof(LoaderHeapFreeBlock), pad enough to make it the size of LoaderHeapFreeBlock)
-//   - c  bytes of pad               (where "c" is just enough to pointer-align the following byte)
+//   - b  bytes of pad               (where "b" is just enough to pointer-align the following byte)
 //
 // ==> Following address is always pointer-aligned
 //=====================================================================================
@@ -893,10 +885,6 @@ inline size_t AllocMem_TotalSize(size_t dwRequestedSize, UnlockedLoaderHeap *pHe
 #ifdef _DEBUG
         dwSize += sizeof(LoaderHeapValidationTag);
 #endif
-        if (dwSize < sizeof(LoaderHeapFreeBlock))
-        {
-            dwSize = sizeof(LoaderHeapFreeBlock);
-        }
     }
     dwSize = ((dwSize + ALLOC_ALIGN_CONSTANT) & (~ALLOC_ALIGN_CONSTANT));
 
@@ -1345,7 +1333,7 @@ void *UnlockedLoaderHeap::UnlockedAllocMem_NoThrow(size_t dwSize
 
     {
         // Any memory available on the free list?
-        void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, TRUE /*fRemoveFromFreeList*/, this);
+        void *pData = LoaderHeapFreeBlock::AllocFromFreeList(&m_pFirstFreeBlock, dwSize, this);
         if (!pData)
         {
             // Enough bytes available in committed region?
@@ -1545,16 +1533,16 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem,
     }
 #endif
 
-    void *pMemRW = pMem;
-    ExecutableWriterHolder<void> memWriterHolder;
-    if (m_Options & LHF_EXECUTABLE)
-    {
-        memWriterHolder = ExecutableWriterHolder<void>(pMem, dwSize);
-        pMemRW = memWriterHolder.GetRW();
-    }
-
     if (m_pAllocPtr == ( ((BYTE*)pMem) + dwSize ))
     {
+        void *pMemRW = pMem;
+        ExecutableWriterHolder<void> memWriterHolder;
+        if (m_Options & LHF_EXECUTABLE)
+        {
+            memWriterHolder = ExecutableWriterHolder<void>(pMem, dwSize);
+            pMemRW = memWriterHolder.GetRW();
+        }
+
         // Cool. This was the last block allocated. We can just undo the allocation instead
         // of going to the freelist.
         memset(pMemRW, 0x00, dwSize); // Fill freed region with 0
@@ -1562,7 +1550,7 @@ void UnlockedLoaderHeap::UnlockedBackoutMem(void *pMem,
     }
     else
     {
-        LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, pMemRW, dwSize, this);
+        LoaderHeapFreeBlock::InsertFreeBlock(&m_pFirstFreeBlock, pMem, dwSize, this);
     }
 }
 
diff --git a/src/coreclr/vm/arm/stubs.cpp b/src/coreclr/vm/arm/stubs.cpp
index b2bf6e0522ea5..6e62df2370338 100644
--- a/src/coreclr/vm/arm/stubs.cpp
+++ b/src/coreclr/vm/arm/stubs.cpp
@@ -329,6 +329,10 @@ void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength)
 {
     DWORD size = (PBYTE)JIT_PatchedWriteBarrierLast - (PBYTE)JIT_PatchedWriteBarrierStart;
     *ppbStart = (PBYTE)JIT_PatchedWriteBarrierStart;
+    if (IsWriteBarrierCopyEnabled())
+    {
+        *ppbStart = GetWriteBarrierCodeLocation(*ppbStart);
+    }
     *pcbLength = size;
 }
 

From 8be6e1948793dd09a1d17f717043c86c54c2e6ca Mon Sep 17 00:00:00 2001
From: Jan Vorlicek <janvorli@microsoft.com>
Date: Sun, 11 Jul 2021 14:38:35 +0200
Subject: [PATCH 8/8] Set the W^X default to disabled

---
 src/coreclr/inc/clrconfigvalues.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index bfd43629017a3..40da0cd2c7396 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -738,7 +738,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_LTTng, W("LTTng"), 1, "If COMPlus_LTTng is
 //
 // Executable code
 //
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 1, "Enable W^X for executable memory.");
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableWriteXorExecute, W("EnableWriteXorExecute"), 0, "Enable W^X for executable memory.");
 
 #ifdef FEATURE_GDBJIT
 ///