Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Tiered Compilation step 1
Browse files Browse the repository at this point in the history
Tiered compilation is a new feature we are experimenting with that aims to improve startup times. Initially we jit methods non-optimized, then switch to an optimized version once the method has been called a number of times. More details about the current feature operation are in the comments of TieredCompilation.cpp.

This is only the first step in a longer process building the feature. The primary goal for now is to avoid regressing any runtime behavior in the shipping configuration in which the complus variable is OFF, while putting enough code in place that we can measure performance in the daily builds and make incremental progress visible to collaborators and reviewers. The design of the TieredCompilationManager is likely to change substantively, and the call counter may also change.
  • Loading branch information
noahfalk committed Mar 30, 2017
1 parent e3eecaa commit cea477c
Show file tree
Hide file tree
Showing 23 changed files with 870 additions and 51 deletions.
1 change: 1 addition & 0 deletions clr.coreclr.props
Expand Up @@ -13,6 +13,7 @@
<FeatureDbiOopDebugging_HostOneCorex86 Condition="'$(TargetArch)' == 'i386' or '$(TargetArch)' == 'arm'">true</FeatureDbiOopDebugging_HostOneCorex86>
<FeatureDbiOopDebugging_HostOneCoreamd64 Condition="'$(TargetArch)' == 'amd64'">true</FeatureDbiOopDebugging_HostOneCoreamd64>
<FeatureEventTrace>true</FeatureEventTrace>
<FeatureFitJit>true</FeatureFitJit>
<FeatureFrameworkInternal>true</FeatureFrameworkInternal>
<FeatureHijack>true</FeatureHijack>
<FeatureInteropDebugging Condition="('$(TargetArch)' == 'i386') or ('$(TargetArch)' == 'amd64')">true</FeatureInteropDebugging>
Expand Down
1 change: 1 addition & 0 deletions clr.defines.targets
Expand Up @@ -20,6 +20,7 @@
<CDefines Condition="'$(FeatureDbgPublish)' == 'true'">$(CDefines);FEATURE_DBG_PUBLISH</CDefines>
<CDefines Condition="'$(FeatureEventTrace)' == 'true'">$(CDefines);FEATURE_EVENT_TRACE</CDefines>
<CDefines Condition="'$(FeatureXplatEventSource)' == 'true'">$(CDefines);FEATURE_EVENTSOURCE_XPLAT</CDefines>
<CDefines Condition="'$(FeatureFitJit)' == 'true'">$(CDefines);FEATURE_FITJIT</CDefines>
<CDefines Condition="'$(FeatureFullNGen)' == 'true'">$(CDefines);FEATURE_FULL_NGEN</CDefines>
<CDefines Condition="'$(FeatureHijack)' == 'true'">$(CDefines);FEATURE_HIJACK</CDefines>
<CDefines Condition="'$(FeatureInteropDebugging)' == 'true'">$(CDefines);FEATURE_INTEROP_DEBUGGING</CDefines>
Expand Down
1 change: 1 addition & 0 deletions clrdefinitions.cmake
Expand Up @@ -125,6 +125,7 @@ endif(FEATURE_EVENT_TRACE)
if(CLR_CMAKE_PLATFORM_UNIX)
add_definitions(-DFEATURE_EVENTSOURCE_XPLAT=1)
endif(CLR_CMAKE_PLATFORM_UNIX)
add_definitions(-DFEATURE_FITJIT)
# NetBSD doesn't implement this feature
if(NOT CMAKE_SYSTEM_NAME STREQUAL NetBSD)
add_definitions(-DFEATURE_HIJACK)
Expand Down
2 changes: 1 addition & 1 deletion src/debug/daccess/daccess.cpp
Expand Up @@ -5235,7 +5235,7 @@ ClrDataAccess::FollowStubStep(
// this and redirect to the actual code.
methodDesc = trace.GetMethodDesc();
if (methodDesc->IsPreImplemented() &&
!methodDesc->IsPointingToNativeCode() &&
!methodDesc->IsPointingToStableNativeCode() &&
!methodDesc->IsGenericMethodDefinition() &&
methodDesc->HasNativeCode())
{
Expand Down
9 changes: 9 additions & 0 deletions src/inc/clrconfigvalues.h
Expand Up @@ -969,6 +969,15 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_SampleIntervalLow,
RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_SampleIntervalHigh, W("HillClimbing_SampleIntervalHigh"), 200, "");
RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent, W("HillClimbing_GainExponent"), 200, "The exponent to apply to the gain, times 100. 100 means to use linear gain, higher values will enhance large moves and damp small ones.");


//
// Tiered Compilation
//
#ifdef FEATURE_FITJIT
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation, W("EXPERIMENTAL_TieredCompilation"), 0, "Enables tiered compilation")
#endif


//
// TypeLoader
//
Expand Down
66 changes: 33 additions & 33 deletions src/inc/loglf.h
Expand Up @@ -4,39 +4,39 @@
// The code in sos.DumpLog depends on the first 32 facility codes
// being bit flags sorted in incresing order.

DEFINE_LOG_FACILITY(LF_GC ,0x00000001)
DEFINE_LOG_FACILITY(LF_GCINFO ,0x00000002)
DEFINE_LOG_FACILITY(LF_STUBS ,0x00000004)
DEFINE_LOG_FACILITY(LF_JIT ,0x00000008)
DEFINE_LOG_FACILITY(LF_LOADER ,0x00000010)
DEFINE_LOG_FACILITY(LF_METADATA ,0x00000020)
DEFINE_LOG_FACILITY(LF_SYNC ,0x00000040)
DEFINE_LOG_FACILITY(LF_EEMEM ,0x00000080)
DEFINE_LOG_FACILITY(LF_GCALLOC ,0x00000100)
DEFINE_LOG_FACILITY(LF_CORDB ,0x00000200)
DEFINE_LOG_FACILITY(LF_CLASSLOADER ,0x00000400)
DEFINE_LOG_FACILITY(LF_CORPROF ,0x00000800)
DEFINE_LOG_FACILITY(LF_REMOTING ,0x00001000)
DEFINE_LOG_FACILITY(LF_DBGALLOC ,0x00002000)
DEFINE_LOG_FACILITY(LF_EH ,0x00004000)
DEFINE_LOG_FACILITY(LF_ENC ,0x00008000)
DEFINE_LOG_FACILITY(LF_ASSERT ,0x00010000)
DEFINE_LOG_FACILITY(LF_VERIFIER ,0x00020000)
DEFINE_LOG_FACILITY(LF_THREADPOOL ,0x00040000)
DEFINE_LOG_FACILITY(LF_GCROOTS ,0x00080000)
DEFINE_LOG_FACILITY(LF_INTEROP ,0x00100000)
DEFINE_LOG_FACILITY(LF_MARSHALER ,0x00200000)
DEFINE_LOG_FACILITY(LF_IJW ,0x00400000)
DEFINE_LOG_FACILITY(LF_ZAP ,0x00800000)
DEFINE_LOG_FACILITY(LF_STARTUP ,0x01000000) // Log startupa and shutdown failures
DEFINE_LOG_FACILITY(LF_APPDOMAIN ,0x02000000)
DEFINE_LOG_FACILITY(LF_CODESHARING ,0x04000000)
DEFINE_LOG_FACILITY(LF_STORE ,0x08000000)
DEFINE_LOG_FACILITY(LF_SECURITY ,0x10000000)
DEFINE_LOG_FACILITY(LF_LOCKS ,0x20000000)
DEFINE_LOG_FACILITY(LF_BCL ,0x40000000)
// LF_ALWAYS 0x80000000 // make certain you don't try to use this bit for a real facility
// LF_ALL 0xFFFFFFFF
DEFINE_LOG_FACILITY(LF_GC ,0x00000001)
DEFINE_LOG_FACILITY(LF_GCINFO ,0x00000002)
DEFINE_LOG_FACILITY(LF_STUBS ,0x00000004)
DEFINE_LOG_FACILITY(LF_JIT ,0x00000008)
DEFINE_LOG_FACILITY(LF_LOADER ,0x00000010)
DEFINE_LOG_FACILITY(LF_METADATA ,0x00000020)
DEFINE_LOG_FACILITY(LF_SYNC ,0x00000040)
DEFINE_LOG_FACILITY(LF_EEMEM ,0x00000080)
DEFINE_LOG_FACILITY(LF_GCALLOC ,0x00000100)
DEFINE_LOG_FACILITY(LF_CORDB ,0x00000200)
DEFINE_LOG_FACILITY(LF_CLASSLOADER ,0x00000400)
DEFINE_LOG_FACILITY(LF_CORPROF ,0x00000800)
DEFINE_LOG_FACILITY(LF_REMOTING ,0x00001000)
DEFINE_LOG_FACILITY(LF_DBGALLOC ,0x00002000)
DEFINE_LOG_FACILITY(LF_EH ,0x00004000)
DEFINE_LOG_FACILITY(LF_ENC ,0x00008000)
DEFINE_LOG_FACILITY(LF_ASSERT ,0x00010000)
DEFINE_LOG_FACILITY(LF_VERIFIER ,0x00020000)
DEFINE_LOG_FACILITY(LF_THREADPOOL ,0x00040000)
DEFINE_LOG_FACILITY(LF_GCROOTS ,0x00080000)
DEFINE_LOG_FACILITY(LF_INTEROP ,0x00100000)
DEFINE_LOG_FACILITY(LF_MARSHALER ,0x00200000)
DEFINE_LOG_FACILITY(LF_TIEREDCOMPILATION ,0x00400000) // This used to be IJW, but now repurposed for tiered compilation
DEFINE_LOG_FACILITY(LF_ZAP ,0x00800000)
DEFINE_LOG_FACILITY(LF_STARTUP ,0x01000000) // Log startupa and shutdown failures
DEFINE_LOG_FACILITY(LF_APPDOMAIN ,0x02000000)
DEFINE_LOG_FACILITY(LF_CODESHARING ,0x04000000)
DEFINE_LOG_FACILITY(LF_STORE ,0x08000000)
DEFINE_LOG_FACILITY(LF_SECURITY ,0x10000000)
DEFINE_LOG_FACILITY(LF_LOCKS ,0x20000000)
DEFINE_LOG_FACILITY(LF_BCL ,0x40000000)
// LF_ALWAYS 0x80000000 // make certain you don't try to use this bit for a real facility
// LF_ALL 0xFFFFFFFF
//
#undef DEFINE_LOG_FACILITY

4 changes: 3 additions & 1 deletion src/vm/CMakeLists.txt
Expand Up @@ -97,6 +97,7 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON
securitydescriptorassembly.cpp
sigformat.cpp
siginfo.cpp
spinlock.cpp
stackwalk.cpp
stublink.cpp
stubmgr.cpp
Expand Down Expand Up @@ -137,6 +138,7 @@ set(VM_SOURCES_WKS
assemblynative.cpp
assemblyspec.cpp
cachelinealloc.cpp
callcounter.cpp
callhelpers.cpp
ceemain.cpp
clrconfignative.cpp
Expand Down Expand Up @@ -220,7 +222,6 @@ set(VM_SOURCES_WKS
sha1.cpp
simplerwlock.cpp
sourceline.cpp
spinlock.cpp
stackingallocator.cpp
stringliteralmap.cpp
stubcache.cpp
Expand All @@ -230,6 +231,7 @@ set(VM_SOURCES_WKS
synch.cpp
synchronizationcontextnative.cpp
testhookmgr.cpp
tieredcompilation.cpp
threaddebugblockinginfo.cpp
threadsuspend.cpp
typeparse.cpp
Expand Down
16 changes: 16 additions & 0 deletions src/vm/appdomain.cpp
Expand Up @@ -4333,6 +4333,10 @@ void AppDomain::Init()
}
#endif //FEATURE_COMINTEROP

#ifdef FEATURE_FITJIT
m_callCounter.SetTieredCompilationManager(&GetTieredCompilationManager());
m_tieredCompilationManager.Init(GetId());
#endif
#endif // CROSSGEN_COMPILE
} // AppDomain::Init

Expand Down Expand Up @@ -8254,6 +8258,18 @@ void AppDomain::Exit(BOOL fRunFinalizers, BOOL fAsyncExit)
}
}

// Tell the tiered compilation manager to stop initiating any new work for background
// jit optimization. Its possible the standard thread unwind mechanisms would pre-emptively
// evacuate the jit threadpool worker threads from the domain on their own, but I see no reason
// to take the risk of relying on them when we can easily augment with a cooperative
// shutdown check. This notification only initiates the process of evacuating the threads
// and then the UnwindThreads() call below is where blocking will occur to ensure the threads
// have exited the domain.
//
#ifdef FEATURE_FITJIT
m_tieredCompilationManager.OnAppDomainShutdown();
#endif

//
// Set up blocks so no threads can enter except for the finalizer and the thread
// doing the unload.
Expand Down
28 changes: 28 additions & 0 deletions src/vm/appdomain.hpp
Expand Up @@ -43,6 +43,11 @@

#include "appxutil.h"

#ifdef FEATURE_FITJIT
#include "tieredcompilation.h"
#include "callcounter.h"
#endif

class BaseDomain;
class SystemDomain;
class SharedDomain;
Expand Down Expand Up @@ -3823,6 +3828,29 @@ class AppDomain : public BaseDomain

#endif

#if defined(FEATURE_FITJIT)

public:
TieredCompilationManager & GetTieredCompilationManager()
{
LIMITED_METHOD_CONTRACT;
return m_tieredCompilationManager;
}

private:
TieredCompilationManager m_tieredCompilationManager;

public:
CallCounter & GetCallCounter()
{
LIMITED_METHOD_CONTRACT;
return m_callCounter;
}

private:
CallCounter m_callCounter;
#endif

#ifdef FEATURE_COMINTEROP

private:
Expand Down
101 changes: 101 additions & 0 deletions src/vm/callcounter.cpp
@@ -0,0 +1,101 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
// ===========================================================================
// File: CallCounter.CPP
//
// ===========================================================================



#include "common.h"
#include "excep.h"
#include "log.h"
#include "tieredcompilation.h"
#include "callcounter.h"

#ifdef FEATURE_FITJIT

CallCounter::CallCounter()
{
LIMITED_METHOD_CONTRACT;

m_lock.Init(LOCK_TYPE_DEFAULT);
}

// Init our connection to the tiered compilation manager during
// AppDomain startup. This pointer will remain valid for the lifetime
// of the AppDomain.
void CallCounter::SetTieredCompilationManager(TieredCompilationManager* pTieredCompilationManager)
{
CONTRACTL
{
NOTHROW;
GC_NOTRIGGER;
CAN_TAKE_LOCK;
MODE_PREEMPTIVE;
}
CONTRACTL_END;

SpinLockHolder holder(&m_lock);
m_pTieredCompilationManager = pTieredCompilationManager;
}

// This is called by the prestub each time the method is invoked in a particular
// AppDomain (the AppDomain for which AppDomain.GetCallCounter() == this). These
// calls continue until we backpatch the prestub to avoid future calls. This allows
// us to track the number of calls to each method and use it as a trigger for tiered
// compilation.
//
// Returns TRUE if no future invocations are needed (we reached the count we cared about)
// and FALSE otherwise. It is permissible to keep calling even when TRUE was previously
// returned and multi-threaded race conditions will surely cause this to occur.
BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc)
{
STANDARD_VM_CONTRACT;

_ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());

// PERF: This as a simple to implement, but not so performant, call counter
// Currently this is only called until we reach a fixed call count and then
// disabled. Its likely we'll want to improve this at some point but
// its not as bad as you might expect. Allocating a counter inline in the
// MethodDesc or at some location computable from the MethodDesc should
// eliminate 1 pointer per-method (the MethodDesc* key) and the CPU
// overhead to acquire the lock/search the dictionary. Depending on where it
// is we may also be able to reduce it to 1 byte counter without wasting the
// following bytes for alignment. Further work to inline the OnMethodCalled
// callback directly into the jitted code would eliminate CPU overhead of
// leaving the prestub unpatched, but may not be good overall as it increases
// the size of the jitted code.


TieredCompilationManager* pCallCounterSink = NULL;
int callCount;
{
//Be careful if you convert to something fully lock/interlocked-free that
//you correctly handle what happens when some N simultaneous calls don't
//all increment the counter. The slight drift is probably neglible for tuning
//but TieredCompilationManager::OnMethodCalled() doesn't expect multiple calls
//each claiming to be exactly the threshhold call count needed to trigger
//optimization.
SpinLockHolder holder(&m_lock);
pCallCounterSink = m_pTieredCompilationManager;

CallCounterEntry* pEntry = const_cast<CallCounterEntry*>(m_methodToCallCount.LookupPtr(pMethodDesc));
if (pEntry == NULL)
{
callCount = 1;
m_methodToCallCount.Add(CallCounterEntry(pMethodDesc, callCount));
}
else
{
pEntry->callCount++;
callCount = pEntry->callCount;
}
}

return pCallCounterSink->OnMethodCalled(pMethodDesc, callCount);
}

#endif // FEATURE_FITJIT

0 comments on commit cea477c

Please sign in to comment.